diff --git a/client/package.json b/client/package.json index 4a90fe0d0..8cf965e99 100644 --- a/client/package.json +++ b/client/package.json @@ -8,7 +8,9 @@ "react": "^17.0.2", "react-bootstrap": "^1.5.2", "react-dom": "^17.0.2", + "react-markdown": "^6.0.2", "react-scripts": "4.0.3", + "remark-gfm": "^1.0.0", "web-vitals": "^1.0.1" }, "scripts": { diff --git a/client/src/Components/ParameterView/DocumentationText.tsx b/client/src/Components/ParameterView/DocumentationText.tsx index 6186274d3..a1ee42991 100644 --- a/client/src/Components/ParameterView/DocumentationText.tsx +++ b/client/src/Components/ParameterView/DocumentationText.tsx @@ -1,22 +1,32 @@ import React, {useState} from "react"; import "./ParameterView.css"; +import ReactMarkdown from "react-markdown"; +import remarkGfm from "remark-gfm"; +import classNames from "classnames"; // @ts-ignore const DocumentationText = ({inputText}) => { - const [readMore,setReadMore] = useState(false); - const moreTextOption = inputText.length > 50 + const shortenedText = inputText.split("\n")[0]; + const hasMultipleLines = shortenedText !== inputText; - const linkName = readMore ? '[Read less]' : '[Read more]' + const [readMore, setReadMore] = useState(false); + + const cssClasses = classNames( + "read-more-button", + { + "pl-1-5rem": !hasMultipleLines, + } + ); return ( -
-

{!readMore && inputText.substr(0, 50)} - {readMore && inputText} - -

+
{ + setReadMore(!readMore) + }}> + {!readMore && hasMultipleLines && "▶"} + {readMore && hasMultipleLines && "▼"} +
); diff --git a/client/src/Components/ParameterView/ParameterNode.tsx b/client/src/Components/ParameterView/ParameterNode.tsx index bcd6f3802..c8a196750 100644 --- a/client/src/Components/ParameterView/ParameterNode.tsx +++ b/client/src/Components/ParameterView/ParameterNode.tsx @@ -8,11 +8,11 @@ type ParameterProps = {inputParameter: PythonParameter} const ParameterNode = ({inputParameter}: ParameterProps) => { - const hasDescription = !!inputParameter.docstring; + const hasDescription = !!inputParameter.description; return (
- +

{inputParameter?.name}

@@ -23,14 +23,14 @@ const ParameterNode = ({inputParameter}: ParameterProps) => { @Enum - +
{ hasDescription && - + } { !hasDescription && -

No Documentation available

+

No Documentation available

}
diff --git a/client/src/Components/ParameterView/ParameterView.css b/client/src/Components/ParameterView/ParameterView.css index 74df6b29d..b3a5dfd67 100644 --- a/client/src/Components/ParameterView/ParameterView.css +++ b/client/src/Components/ParameterView/ParameterView.css @@ -3,14 +3,17 @@ position: -webkit-sticky; position: sticky; top: 0; + overflow-x: hidden; } .parameterViewDiv h2{ padding-top: 1rem; + padding-left: 1rem; } .parameterViewDiv h5{ padding-top: 1rem; + padding-left: 1rem; } .parameterViewDiv a{ @@ -22,12 +25,22 @@ padding-bottom: .5rem; } +.docuTextHidden { + text-overflow: ellipsis; +} + +.docu-paragraph{ + display: flex; + justify-content: flex-start; +} + .parameter-header { + display: flex; + justify-content: flex-start; font-weight: bold; } .parameter-name { - float: left; margin-right: 15px; } @@ -41,15 +54,11 @@ } .read-more-button { + text-align: left; background: none!important; border: none; - /*padding: 0!important;*/ padding-left: 0.5rem; - /*optional*/ font-family: arial, sans-serif; - /*input has OS specific font-family*/ - color: #069; - text-decoration: underline; cursor: pointer; } diff --git a/client/src/Components/TreeView/TreeView.tsx b/client/src/Components/TreeView/TreeView.tsx index 7a440fceb..ce79a2c1a 100644 --- a/client/src/Components/TreeView/TreeView.tsx +++ b/client/src/Components/TreeView/TreeView.tsx @@ -1,7 +1,7 @@ import React from 'react' import Tree from "../Tree/Tree"; import './tree-view.css'; -import packageJson from "../../sklearn.json"; +import packageJson from "../../data/sklearn_new_schema.json"; import PythonPackageBuilder from "../../model/PythonPackageBuilder"; type TreeViewProps = { diff --git a/client/src/data/sklearn.json b/client/src/data/sklearn.json deleted file mode 100644 index 54d099cf6..000000000 --- a/client/src/data/sklearn.json +++ /dev/null @@ -1,127935 +0,0 @@ -{ - "name": "sklearn", - "modules": [ - { - "name": "sklearn.base", - "imports": [ - "import copy", - "import warnings", - "from collections import defaultdict", - "import platform", - "import inspect", - "import re", - "import numpy as np", - "from None import __version__", - "from _config import get_config", - "from utils import _IS_32BIT", - "from utils._tags import _DEFAULT_TAGS", - "from utils._tags import _safe_tags", - "from utils.validation import check_X_y", - "from utils.validation import check_array", - "from utils._estimator_html_repr import estimator_html_repr", - "from utils.validation import _deprecate_positional_args", - "from utils._pprint import _EstimatorPrettyPrinter", - "from metrics import accuracy_score", - "from metrics import r2_score" - ], - "classes": [ - { - "name": "BaseEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_get_param_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameter names for the estimator" - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters for this estimator.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [ - { - "name": "**params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nThe method works on simple estimators as well as on nested objects\n(such as :class:`~sklearn.pipeline.Pipeline`). The latter have\nparameters of the form ``__`` so that it's\npossible to update each component of a nested object.\n\nParameters\n----------\n**params : dict\n Estimator parameters.\n\nReturns\n-------\nself : estimator instance\n Estimator instance." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_n_features", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "reset", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, the `n_features_in_` attribute is set to `X.shape[1]`. If False and the attribute exists, then check that it is equal to `X.shape[1]`. If False and the attribute does *not* exist, then the check is skipped. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the `n_features_in_` attribute, or check against it.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\nreset : bool\n If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n If False and the attribute exists, then check that it is equal to\n `X.shape[1]`. If False and the attribute does *not* exist, then\n the check is skipped.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`." - }, - { - "name": "_validate_data", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "'no_validation'", - "limitation": null, - "ignored": false, - "docstring": "The targets. - If `None`, `check_array` is called on `X`. If the estimator's requires_y tag is True, then an error will be raised. - If `'no_validation'`, `check_array` is called on `X` and the estimator's requires_y tag is ignored. This is a default placeholder and is never meant to be explicitly set. - Otherwise, both `X` and `y` are checked with either `check_array` or `check_X_y` depending on `validate_separately`." - }, - { - "name": "reset", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to reset the `n_features_in_` attribute. If False, the input will be checked for consistency with data provided when reset was last True. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`." - }, - { - "name": "validate_separately", - "type": "Union[Literal[False], Tuple[]]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Only used if y is not None. If False, call validate_X_y(). Else, it must be a tuple of kwargs to be used for calling check_array() on X and y respectively." - }, - { - "name": "**check_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to :func:`sklearn.utils.check_array` or :func:`sklearn.utils.check_X_y`. Ignored if validate_separately is not False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate input data and set or check the `n_features_in_` attribute.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n The input samples.\ny : array-like of shape (n_samples,), default='no_validation'\n The targets.\n\n - If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n - If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n - Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`.\n\nreset : bool, default=True\n Whether to reset the `n_features_in_` attribute.\n If False, the input will be checked for consistency with data\n provided when reset was last True.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\nvalidate_separately : False or tuple of dicts, default=False\n Only used if y is not None.\n If False, call validate_X_y(). Else, it must be a tuple of kwargs\n to be used for calling check_array() on X and y respectively.\n**check_params : kwargs\n Parameters passed to :func:`sklearn.utils.check_array` or\n :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n is not False.\n\nReturns\n-------\nout : {ndarray, sparse matrix} or tuple of these\n The validated input. A tuple is returned if `y` is not None." - }, - { - "name": "_repr_html_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "HTML representation of estimator.\n\nThis is redundant with the logic of `_repr_mimebundle_`. The latter\nshould be favorted in the long term, `_repr_html_` is only\nimplemented for consumers who do not interpret `_repr_mimbundle_`." - }, - { - "name": "_repr_html_inner", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This function is returned by the @property `_repr_html_` to make\n`hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\non `get_config()[\"display\"]`." - }, - { - "name": "_repr_mimebundle_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mime bundle used by jupyter kernels to display estimator" - } - ], - "docstring": "Base class for all estimators in scikit-learn.\n\nNotes\n-----\nAll estimators should specify all the parameters that can be set\nat the class level in their ``__init__`` as explicit keyword\narguments (no ``*args`` or ``**kwargs``)." - }, - { - "name": "ClassifierMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for `X`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of ``self.predict(X)`` wrt. `y`." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin class for all classifiers in scikit-learn." - }, - { - "name": "RegressorMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples. For some estimators this may be a precomputed kernel matrix or a list of generic objects instead with shape ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values for `X`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the coefficient of determination :math:`R^2` of the\nprediction.\n\nThe coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,\nwhere :math:`u` is the residual sum of squares ``((y_true - y_pred)\n** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\ny_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\ncan be negative (because the model can be arbitrarily worse). A\nconstant model that always predicts the expected value of `y`,\ndisregarding the input features, would get a :math:`R^2` score of\n0.0.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples. For some estimators this may be a precomputed\n kernel matrix or a list of generic objects instead with shape\n ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\n is the number of samples used in the fitting for the estimator.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n :math:`R^2` of ``self.predict(X)`` wrt. `y`.\n\nNotes\n-----\nThe :math:`R^2` score used when calling ``score`` on a regressor uses\n``multioutput='uniform_average'`` from version 0.23 to keep consistent\nwith default value of :func:`~sklearn.metrics.r2_score`.\nThis influences the ``score`` method of all the multioutput\nregressors (except for\n:class:`~sklearn.multioutput.MultiOutputRegressor`)." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin class for all regression estimators in scikit-learn." - }, - { - "name": "ClusterMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform clustering on `X` and returns cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,), dtype=np.int64\n Cluster labels." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin class for all cluster estimators in scikit-learn." - }, - { - "name": "BiclusterMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "biclusters_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convenient way to get row and column indicators together.\n\nReturns the ``rows_`` and ``columns_`` members." - }, - { - "name": "get_indices", - "decorators": [], - "parameters": [ - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the cluster." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Row and column indices of the `i`'th bicluster.\n\nOnly works if ``rows_`` and ``columns_`` attributes exist.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nrow_ind : ndarray, dtype=np.intp\n Indices of rows in the dataset that belong to the bicluster.\ncol_ind : ndarray, dtype=np.intp\n Indices of columns in the dataset that belong to the bicluster." - }, - { - "name": "get_shape", - "decorators": [], - "parameters": [ - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the cluster." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Shape of the `i`'th bicluster.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nn_rows : int\n Number of rows in the bicluster.\n\nn_cols : int\n Number of columns in the bicluster." - }, - { - "name": "get_submatrix", - "decorators": [], - "parameters": [ - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the cluster." - }, - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the submatrix corresponding to bicluster `i`.\n\nParameters\n----------\ni : int\n The index of the cluster.\ndata : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nsubmatrix : ndarray of shape (n_rows, n_cols)\n The submatrix corresponding to bicluster `i`.\n\nNotes\n-----\nWorks with sparse matrices. Only works if ``rows_`` and\n``columns_`` attributes exist." - } - ], - "docstring": "Mixin class for all bicluster estimators in scikit-learn." - }, - { - "name": "TransformerMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (None for unsupervised transformations)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional fit parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit to data, then transform it.\n\nFits transformer to `X` and `y` with optional parameters `fit_params`\nand returns a transformed version of `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array." - } - ], - "docstring": "Mixin class for all transformers in scikit-learn." - }, - { - "name": "DensityMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the score of the model on the data `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nscore : float" - } - ], - "docstring": "Mixin class for all density estimators in scikit-learn." - }, - { - "name": "OutlierMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform fit on X and returns labels for X.\n\nReturns -1 for outliers and 1 for inliers.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n 1 for inliers, -1 for outliers." - } - ], - "docstring": "Mixin class for all outlier detection estimators in scikit-learn." - }, - { - "name": "MetaEstimatorMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "MultiOutputMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin to mark estimators that support multioutput." - }, - { - "name": "_UnstableArchMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mark estimators that are non-determinstic on 32bit or PowerPC" - } - ], - "functions": [ - { - "name": "clone", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": "Union[List, Tuple[], Set]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator or group of estimators to be cloned." - }, - { - "name": "safe", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If safe is False, clone will fall back to a deep copy on objects that are not estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Constructs a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It yields a new estimator\nwith the same parameters that has not been fitted on any data.\n\nIf the estimator's `random_state` parameter is an integer (or if the\nestimator doesn't have a `random_state` parameter), an *exact clone* is\nreturned: the clone and the original estimator will give the exact same\nresults. Otherwise, *statistical clone* is returned: the clone might\nyield different results from the original estimator. More details can be\nfound in :ref:`randomness`.\n\nParameters\n----------\nestimator : {list, tuple, set} of estimator instance or a single estimator instance\n The estimator or group of estimators to be cloned.\n\nsafe : bool, default=True\n If safe is False, clone will fall back to a deep copy on objects\n that are not estimators." - }, - { - "name": "_pprint", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary to pretty print" - }, - { - "name": "offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The offset in characters to add at the begin of each line." - }, - { - "name": "printer", - "type": "Callable", - "hasDefault": true, - "default": "repr", - "limitation": null, - "ignored": false, - "docstring": "The function to convert entries to strings, typically the builtin str or repr" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pretty print the dictionary 'params'\n\nParameters\n----------\nparams : dict\n The dictionary to pretty print\n\noffset : int, default=0\n The offset in characters to add at the begin of each line.\n\nprinter : callable, default=repr\n The function to convert entries to strings, typically\n the builtin str or repr" - }, - { - "name": "is_classifier", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the given estimator is (probably) a classifier.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a classifier and False otherwise." - }, - { - "name": "is_regressor", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the given estimator is (probably) a regressor.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a regressor and False otherwise." - }, - { - "name": "is_outlier_detector", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the given estimator is (probably) an outlier detector.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is an outlier detector and False otherwise." - }, - { - "name": "_is_pairwise", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns True if estimator is pairwise.\n\n- If the `_pairwise` attribute and the tag are present and consistent,\n then use the value and not issue a warning.\n- If the `_pairwise` attribute and the tag are present and not\n consistent, use the `_pairwise` value and issue a deprecation\n warning.\n- If only the `_pairwise` attribute is present and it is not False,\n issue a deprecation warning and use the `_pairwise` value.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if the estimator is pairwise and False otherwise." - } - ] - }, - { - "name": "sklearn.calibration", - "imports": [ - "import warnings", - "from inspect import signature", - "from contextlib import suppress", - "from functools import partial", - "from math import log", - "import numpy as np", - "from joblib import Parallel", - "from scipy.special import expit", - "from scipy.special import xlogy", - "from scipy.optimize import fmin_bfgs", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import clone", - "from base import MetaEstimatorMixin", - "from preprocessing import label_binarize", - "from preprocessing import LabelEncoder", - "from utils import check_array", - "from utils import column_or_1d", - "from utils import deprecated", - "from utils import indexable", - "from utils.multiclass import check_classification_targets", - "from utils.fixes import delayed", - "from utils.validation import check_is_fitted", - "from utils.validation import check_consistent_length", - "from utils.validation import _check_sample_weight", - "from pipeline import Pipeline", - "from isotonic import IsotonicRegression", - "from svm import LinearSVC", - "from model_selection import check_cv", - "from model_selection import cross_val_predict", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "CalibratedClassifierCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The classifier whose output need to be calibrated to provide more accurate `predict_proba` outputs. The default classifier is a :class:`~sklearn.svm.LinearSVC`." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": true, - "default": "'sigmoid'", - "limitation": null, - "ignored": false, - "docstring": "The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method (i.e. a logistic regression model) or 'isotonic' which is a non-parametric approach. It is not advised to use isotonic calibration with too few calibration samples ``(<<1000)`` since it tends to overfit." - }, - { - "name": "cv", - "type": "Union[Literal[\"prefit\"], int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` is used. Refer to the :ref:`User Guide ` for the various cross-validation strategies that can be used here. If \"prefit\" is passed, it is assumed that `base_estimator` has been fitted already and all data is used for calibration. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Base estimator clones are fitted in parallel across cross-validation iterations. Therefore parallelism happens only when `cv != \"prefit\"`. See :term:`Glossary ` for more details. .. versionadded:: 0.24" - }, - { - "name": "ensemble", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Determines how the calibrator is fitted when `cv` is not `'prefit'`. Ignored if `cv='prefit'`. If `True`, the `base_estimator` is fitted using training data and calibrated using testing data, for each `cv` fold. The final estimator is an ensemble of `n_cv` fitted classifer and calibrator pairs, where `n_cv` is the number of cross-validation folds. The output is the average predicted probabilities of all pairs. If `False`, `cv` is used to compute unbiased predictions, via :func:`~sklearn.model_selection.cross_val_predict`, which are then used for calibration. At prediction time, the classifier used is the `base_estimator` trained on all the data. Note that this method is also internally implemented in :mod:`sklearn.svm` estimators with the `probabilities=True` parameter. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the calibrated model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n The predicted probas." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the target of new samples. The predicted class is the\nclass that has the highest probability, and can thus be different\nfrom the prediction of the uncalibrated classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n The predicted class." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator instance, default=None\n The classifier whose output need to be calibrated to provide more\n accurate `predict_proba` outputs. The default classifier is\n a :class:`~sklearn.svm.LinearSVC`.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method (i.e. a logistic regression model) or\n 'isotonic' which is a non-parametric approach. It is not advised to\n use isotonic calibration with too few calibration samples\n ``(<<1000)`` since it tends to overfit.\n\ncv : int, cross-validation generator, iterable or \"prefit\", default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n is used.\n\n Refer to the :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n If \"prefit\" is passed, it is assumed that `base_estimator` has been\n fitted already and all data is used for calibration.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors.\n\n Base estimator clones are fitted in parallel across cross-validation\n iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n See :term:`Glossary ` for more details.\n\n .. versionadded:: 0.24\n\nensemble : bool, default=True\n Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n Ignored if `cv='prefit'`.\n\n If `True`, the `base_estimator` is fitted using training data and\n calibrated using testing data, for each `cv` fold. The final estimator\n is an ensemble of `n_cv` fitted classifer and calibrator pairs, where\n `n_cv` is the number of cross-validation folds. The output is the\n average predicted probabilities of all pairs.\n\n If `False`, `cv` is used to compute unbiased predictions, via\n :func:`~sklearn.model_selection.cross_val_predict`, which are then\n used for calibration. At prediction time, the classifier used is the\n `base_estimator` trained on all the data.\n Note that this method is also internally implemented in\n :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The class labels.\n\ncalibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` or `ensemble=False`)\n The list of classifier and calibrator pairs.\n\n - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n calibrator.\n - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n `base_estimator` and calibrator pairs. `n_cv` is the number of\n cross-validation folds.\n - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n fitted on all the data, and fitted calibrator.\n\n .. versionchanged:: 0.24\n Single calibrated classifier case when `ensemble=False`.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.calibration import CalibratedClassifierCV\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> base_clf = GaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n>>> calibrated_clf.fit(X, y)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n>>> len(calibrated_clf.calibrated_classifiers_)\n3\n>>> calibrated_clf.predict_proba(X)[:5, :]\narray([[0.110..., 0.889...],\n [0.072..., 0.927...],\n [0.928..., 0.071...],\n [0.928..., 0.071...],\n [0.071..., 0.928...]])\n\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> X_train, X_calib, y_train, y_calib = train_test_split(\n... X, y, random_state=42\n... )\n>>> base_clf = GaussianNB()\n>>> base_clf.fit(X_train, y_train)\nGaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(\n... base_estimator=base_clf,\n... cv=\"prefit\"\n... )\n>>> calibrated_clf.fit(X_calib, y_calib)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n>>> len(calibrated_clf.calibrated_classifiers_)\n1\n>>> calibrated_clf.predict_proba([[-0.5, 0.5]])\narray([[0.936..., 0.063...]])\n\nReferences\n----------\n.. [1] Obtaining calibrated probability estimates from decision trees\n and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n.. [2] Transforming Classifier Scores into Accurate Multiclass\n Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n Regularized Likelihood Methods, J. Platt, (1999)\n\n.. [4] Predicting Good Probabilities with Supervised Learning,\n A. Niculescu-Mizil & R. Caruana, ICML 2005" - }, - { - "name": "_CalibratedClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier." - }, - { - "name": "calibrators", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of fitted calibrators (either 'IsotonicRegression' or '_SigmoidCalibration'). The number of calibrators equals the number of classes. However, if there are 2 classes, the list contains only one fitted calibrator." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "All the prediction classes." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": true, - "default": "'sigmoid'", - "limitation": null, - "ignored": false, - "docstring": "The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method or 'isotonic' which is a non-parametric approach based on isotonic regression." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "calibrators_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The sample data.\n\nReturns\n-------\nproba : array, shape (n_samples, n_classes)\n The predicted probabilities. Can be exact zeros." - } - ], - "docstring": "Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\nParameters\n----------\nbase_estimator : estimator instance\n Fitted classifier.\n\ncalibrators : list of fitted estimator instances\n List of fitted calibrators (either 'IsotonicRegression' or\n '_SigmoidCalibration'). The number of calibrators equals the number of\n classes. However, if there are 2 classes, the list contains only one\n fitted calibrator.\n\nclasses : array-like of shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method or 'isotonic' which is a\n non-parametric approach based on isotonic regression.\n\nAttributes\n----------\ncalibrators_ : list of fitted estimator instances\n Same as `calibrators`. Exposed for backward-compatibility. Use\n `calibrators` instead.\n\n .. deprecated:: 0.24\n `calibrators_` is deprecated from 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `calibrators` instead." - }, - { - "name": "_SigmoidCalibration", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training target." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "T", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,)\n Data to predict from.\n\nReturns\n-------\nT_ : ndarray of shape (n_samples,)\n The predicted data." - } - ], - "docstring": "Sigmoid regression model.\n\nAttributes\n----------\na_ : float\n The slope.\n\nb_ : float\n The intercept." - } - ], - "functions": [ - { - "name": "_fit_classifier_calibrator_pair", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cloned base estimator." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - }, - { - "name": "train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the training subset." - }, - { - "name": "test", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the testing subset." - }, - { - "name": "supports_sw", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the `estimator` supports sample weights." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method to use for calibration." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target classes." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights for `X`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit a classifier/calibration pair on a given train/test split.\n\nFit the classifier on the train set, compute its predictions on the test\nset and use the predictions as input to fit the calibrator along with the\ntest labels.\n\nParameters\n----------\nestimator : estimator instance\n Cloned base estimator.\n\nX : array-like, shape (n_samples, n_features)\n Sample data.\n\ny : array-like, shape (n_samples,)\n Targets.\n\ntrain : ndarray, shape (n_train_indicies,)\n Indices of the training subset.\n\ntest : ndarray, shape (n_test_indicies,)\n Indices of the testing subset.\n\nsupports_sw : bool\n Whether or not the `estimator` supports sample weights.\n\nmethod : {'sigmoid', 'isotonic'}\n Method to use for calibration.\n\nclasses : ndarray, shape (n_classes,)\n The target classes.\n\nsample_weight : array-like, default=None\n Sample weights for `X`.\n\nReturns\n-------\ncalibrated_classifier : _CalibratedClassifier instance" - }, - { - "name": "_get_prediction_method", - "decorators": [], - "parameters": [ - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier to obtain the prediction method from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.\n\nParameters\n----------\nclf : Estimator instance\n Fitted classifier to obtain the prediction method from.\n\nReturns\n-------\nprediction_method : callable\n The prediction method." - }, - { - "name": "_compute_predictions", - "decorators": [], - "parameters": [ - { - "name": "pred_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prediction method." - }, - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data used to obtain predictions." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes present." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return predictions for `X` and reshape binary outputs to shape\n(n_samples, 1).\n\nParameters\n----------\npred_method : callable\n Prediction method.\n\nX : array-like or None\n Data used to obtain predictions.\n\nn_classes : int\n Number of classes present.\n\nReturns\n-------\npredictions : array-like, shape (X.shape[0], len(clf.classes_))\n The predictions. Note if there are 2 classes, array is of shape\n (X.shape[0], 1)." - }, - { - "name": "_fit_calibrator", - "decorators": [], - "parameters": [ - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier." - }, - { - "name": "predictions", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Raw predictions returned by the un-calibrated base classifier." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The targets." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "All the prediction classes." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The method to use for calibration." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.\n\nParameters\n----------\nclf : estimator instance\n Fitted classifier.\n\npredictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) when binary.\n Raw predictions returned by the un-calibrated base classifier.\n\ny : array-like, shape (n_samples,)\n The targets.\n\nclasses : ndarray, shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}\n The method to use for calibration.\n\nsample_weight : ndarray, shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\npipeline : _CalibratedClassifier instance" - }, - { - "name": "_sigmoid_calibration", - "decorators": [], - "parameters": [ - { - "name": "predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision function or predict proba for the samples." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The targets." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability Calibration with sigmoid method (Platt 2000)\n\nParameters\n----------\npredictions : ndarray of shape (n_samples,)\n The decision function or predict proba for the samples.\n\ny : ndarray of shape (n_samples,)\n The targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\na : float\n The slope.\n\nb : float\n The intercept.\n\nReferences\n----------\nPlatt, \"Probabilistic Outputs for Support Vector Machines\"" - }, - { - "name": "calibration_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets." - }, - { - "name": "y_prob", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Probabilities of the positive class." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether y_prob needs to be normalized into the [0, 1] interval, i.e. is not a proper probability. If True, the smallest value in y_prob is linearly mapped onto 0 and the largest one onto 1." - }, - { - "name": "n_bins", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of bins to discretize the [0, 1] interval. A bigger number requires more data. Bins with no samples (i.e. without corresponding values in `y_prob`) will not be returned, thus the returned arrays may have less than `n_bins` values." - }, - { - "name": "strategy", - "type": "Literal['uniform', 'quantile']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Strategy used to define the widths of the bins. uniform The bins have identical widths. quantile The bins have the same number of samples and depend on `y_prob`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute true and predicted probabilities for a calibration curve.\n\nThe method assumes the inputs come from a binary classifier, and\ndiscretize the [0, 1] interval into bins.\n\nCalibration curves may also be referred to as reliability diagrams.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True targets.\n\ny_prob : array-like of shape (n_samples,)\n Probabilities of the positive class.\n\nnormalize : bool, default=False\n Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\n is not a proper probability. If True, the smallest value in y_prob\n is linearly mapped onto 0 and the largest one onto 1.\n\nn_bins : int, default=5\n Number of bins to discretize the [0, 1] interval. A bigger number\n requires more data. Bins with no samples (i.e. without\n corresponding values in `y_prob`) will not be returned, thus the\n returned arrays may have less than `n_bins` values.\n\nstrategy : {'uniform', 'quantile'}, default='uniform'\n Strategy used to define the widths of the bins.\n\n uniform\n The bins have identical widths.\n quantile\n The bins have the same number of samples and depend on `y_prob`.\n\nReturns\n-------\nprob_true : ndarray of shape (n_bins,) or smaller\n The proportion of samples whose class is the positive class, in each\n bin (fraction of positives).\n\nprob_pred : ndarray of shape (n_bins,) or smaller\n The mean predicted probability in each bin.\n\nReferences\n----------\nAlexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good\nProbabilities With Supervised Learning, in Proceedings of the 22nd\nInternational Conference on Machine Learning (ICML).\nSee section 4 (Qualitative Analysis of Predictions).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.calibration import calibration_curve\n>>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1])\n>>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.])\n>>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3)\n>>> prob_true\narray([0. , 0.5, 1. ])\n>>> prob_pred\narray([0.2 , 0.525, 0.85 ])" - } - ] - }, - { - "name": "sklearn.conftest", - "imports": [ - "import os", - "from os import environ", - "from functools import wraps", - "import pytest", - "from threadpoolctl import threadpool_limits", - "from sklearn.utils._openmp_helpers import _openmp_effective_n_threads", - "from sklearn.datasets import fetch_20newsgroups", - "from sklearn.datasets import fetch_20newsgroups_vectorized", - "from sklearn.datasets import fetch_california_housing", - "from sklearn.datasets import fetch_covtype", - "from sklearn.datasets import fetch_kddcup99", - "from sklearn.datasets import fetch_olivetti_faces", - "from sklearn.datasets import fetch_rcv1" - ], - "classes": [], - "functions": [ - { - "name": "_fetch_fixture", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fetch dataset (download if missing and requested by environment)." - }, - { - "name": "pytest_collection_modifyitems", - "decorators": [], - "parameters": [ - { - "name": "config", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "items", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called after collect is completed.\n\nParameters\n----------\nconfig : pytest config\nitems : list of collected items" - }, - { - "name": "pyplot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Setup and teardown fixture for matplotlib.\n\nThis fixture checks if we can import matplotlib. If not, the tests will be\nskipped. Otherwise, we setup matplotlib backend and close the figures\nafter running the functions.\n\nReturns\n-------\npyplot : module\n The ``matplotlib.pyplot`` module." - }, - { - "name": "pytest_runtest_setup", - "decorators": [], - "parameters": [ - { - "name": "item", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "item to be processed" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the number of openmp threads based on the number of workers\nxdist is using to prevent oversubscription.\n\nParameters\n----------\nitem : pytest item\n item to be processed" - } - ] - }, - { - "name": "sklearn.discriminant_analysis", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy.special import expit", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from base import ClassifierMixin", - "from linear_model._base import LinearClassifierMixin", - "from covariance import ledoit_wolf", - "from covariance import empirical_covariance", - "from covariance import shrunk_covariance", - "from utils.multiclass import unique_labels", - "from utils import check_array", - "from utils.validation import check_is_fitted", - "from utils.multiclass import check_classification_targets", - "from utils.extmath import softmax", - "from preprocessing import StandardScaler", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "LinearDiscriminantAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "solver", - "type": "Literal['svd', 'lsqr', 'eigen']", - "hasDefault": true, - "default": "'svd'", - "limitation": null, - "ignored": false, - "docstring": "Solver to use, possible values: - 'svd': Singular value decomposition (default). Does not compute the covariance matrix, therefore this solver is recommended for data with a large number of features. - 'lsqr': Least squares solution. Can be combined with shrinkage or custom covariance estimator. - 'eigen': Eigenvalue decomposition. Can be combined with shrinkage or custom covariance estimator." - }, - { - "name": "shrinkage", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. This should be left to None if `covariance_estimator` is used. Note that shrinkage works only with 'lsqr' and 'eigen' solvers." - }, - { - "name": "priors", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class prior probabilities. By default, the class proportions are inferred from the training data." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components (<= min(n_classes - 1, n_features)) for dimensionality reduction. If None, will be set to min(n_classes - 1, n_features). This parameter only affects the `transform` method." - }, - { - "name": "store_covariance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, explicitely compute the weighted within-class covariance matrix when solver is 'svd'. The matrix is always computed and stored for the other solvers. .. versionadded:: 0.17" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Absolute threshold for a singular value of X to be considered significant, used to estimate the rank of X. Dimensions whose singular values are non-significant are discarded. Only used if solver is 'svd'. .. versionadded:: 0.17" - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying on the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in :mod:`sklearn.covariance`. if None the shrinkage parameter drives the estimate. This should be left to None if `shrinkage` is used. Note that `covariance_estimator` works only with 'lsqr' and 'eigen' solvers. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_lsqr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "shrinkage", - "type": "Optional[Union[Literal['auto'], float]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage. - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Shrinkage parameter is ignored if `covariance_estimator` i not None" - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. if None the shrinkage parameter drives the estimate. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 2.6.2, pp. 39-41.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3." - }, - { - "name": "_solve_eigen", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "shrinkage", - "type": "Optional[Union[Literal['auto'], float]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage. - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage constant. Shrinkage parameter is ignored if `covariance_estimator` i not None" - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. if None the shrinkage parameter drives the estimate. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Eigenvalue solver.\n\nThe eigenvalue solver computes the optimal solution of the Rayleigh\ncoefficient (basically the ratio of between class scatter to within\nclass scatter). This solver supports both classification and\ndimensionality reduction (with any covariance estimator).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 3.8.3, pp. 121-124.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3." - }, - { - "name": "_solve_svd", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "SVD solver.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project data to maximize class separation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated probabilities." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate log probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated log probabilities." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples (test vectors)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class." - } - ], - "docstring": "Linear Discriminant Analysis\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n Solver to use, possible values:\n - 'svd': Singular value decomposition (default).\n Does not compute the covariance matrix, therefore this solver is\n recommended for data with a large number of features.\n - 'lsqr': Least squares solution.\n Can be combined with shrinkage or custom covariance estimator.\n - 'eigen': Eigenvalue decomposition.\n Can be combined with shrinkage or custom covariance estimator.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n This should be left to None if `covariance_estimator` is used.\n Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n The class prior probabilities. By default, the class proportions are\n inferred from the training data.\n\nn_components : int, default=None\n Number of components (<= min(n_classes - 1, n_features)) for\n dimensionality reduction. If None, will be set to\n min(n_classes - 1, n_features). This parameter only affects the\n `transform` method.\n\nstore_covariance : bool, default=False\n If True, explicitely compute the weighted within-class covariance\n matrix when solver is 'svd'. The matrix is always computed\n and stored for the other solvers.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value of X to be considered\n significant, used to estimate the rank of X. Dimensions whose\n singular values are non-significant are discarded. Only used if\n solver is 'svd'.\n\n .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance`.\n if None the shrinkage parameter drives the estimate.\n\n This should be left to None if `shrinkage` is used.\n Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n solvers.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix. It corresponds to\n `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n samples in class `k`. The `C_k` are estimated using the (potentially\n shrunk) biased estimator of covariance. If solver is 'svd', only\n exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If ``n_components`` is not set then all components are stored and the\n sum of explained variances is equal to 1.0. Only available when eigen\n or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n Scaling of the features in the space spanned by the class centroids.\n Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]" - }, - { - "name": "QuadraticDiscriminantAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "priors", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Class priors. By default, the class proportions are inferred from the training data." - }, - { - "name": "reg_param", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Regularizes the per-class covariance estimates by transforming S2 as ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``, where S2 corresponds to the `scaling_` attribute of a given class." - }, - { - "name": "store_covariance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the class covariance matrices are explicitely computed and stored in the `self.covariance_` attribute. .. versionadded:: 0.17" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Absolute threshold for a singular value to be considered significant, used to estimate the rank of `Xk` where `Xk` is the centered matrix of samples in class k. This parameter does not affect the predictions. It only controls a warning that is raised when features are considered to be colinear. .. versionadded:: 0.17" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers)" - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples (test vectors)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples/test vectors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior probabilities of classification per class." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples/test vectors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return log of posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior log-probabilities of classification per class." - } - ], - "docstring": "Quadratic Discriminant Analysis\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : ndarray of shape (n_classes,), default=None\n Class priors. By default, the class proportions are inferred from the\n training data.\n\nreg_param : float, default=0.0\n Regularizes the per-class covariance estimates by transforming S2 as\n ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n If True, the class covariance matrices are explicitely computed and\n stored in the `self.covariance_` attribute.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value to be considered significant,\n used to estimate the rank of `Xk` where `Xk` is the centered matrix\n of samples in class k. This parameter does not affect the\n predictions. It only controls a warning that is raised when features\n are considered to be colinear.\n\n .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray of shape (n_features, n_features)\n For each class, gives the covariance matrix estimated using the\n samples of that class. The estimations are unbiased. Only present if\n `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n For each class k an array of shape (n_features, n_k), where\n ``n_k = min(n_features, number of elements in class k)``\n It is the rotation of the Gaussian distribution, i.e. its\n principal axis. It corresponds to `V`, the matrix of eigenvectors\n coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n For each class, contains the scaling of\n the Gaussian distributions along its principal axes, i.e. the\n variance in the rotated coordinate system. It corresponds to `S^2 /\n (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n Unique class labels.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis." - } - ], - "functions": [ - { - "name": "_cov", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n Shrinkage parameter, possible values:\n - None or 'empirical': no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator`\n is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance``.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ns : ndarray of shape (n_features, n_features)\n Estimated covariance matrix." - }, - { - "name": "_class_means", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute class means.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nReturns\n-------\nmeans : array-like of shape (n_classes, n_features)\n Class means." - }, - { - "name": "_class_cov", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "priors", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Class priors." - }, - { - "name": "shrinkage", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Shrinkage parameter is ignored if `covariance_estimator` is not None." - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. If None, the shrinkage parameter drives the estimate. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute weighted within-class covariance matrix.\n\nThe per-class covariance are weighted by the class priors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\npriors : array-like of shape (n_classes,)\n Class priors.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n If None, the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncov : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix" - } - ] - }, - { - "name": "sklearn.dummy", - "imports": [ - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import check_random_state", - "from utils.validation import _num_samples", - "from utils.validation import check_array", - "from utils.validation import check_consistent_length", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.random import _random_choice_csc", - "from utils.stats import _weighted_percentile", - "from utils.multiclass import class_distribution", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "DummyClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "strategy", - "type": "Literal[\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"]", - "hasDefault": true, - "default": "\"prior\"", - "limitation": null, - "ignored": false, - "docstring": "Strategy to use to generate predictions. * \"stratified\": generates predictions by respecting the training set's class distribution. * \"most_frequent\": always predicts the most frequent label in the training set. * \"prior\": always predicts the class that maximizes the class prior (like \"most_frequent\") and ``predict_proba`` returns the class prior. * \"uniform\": generates predictions uniformly at random. * \"constant\": always predicts a constant label that is provided by the user. This is useful for metrics that evaluate a non-majority class .. versionchanged:: 0.24 The default value of `strategy` has changed to \"prior\" in version 0.24." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness to generate the predictions when ``strategy='stratified'`` or ``strategy='uniform'``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "constant", - "type": "Union[str, int, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The explicit constant as predicted by the \"constant\" strategy. This parameter is useful only for the \"constant\" strategy." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the random classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the probability of the sample for each class in\n the model, where classes are ordered arithmetically, for each\n output." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, requires length = n_samples" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return log probability estimates for the test vectors X.\n\nParameters\n----------\nX : {array-like, object with finite length or shape}\n Training data, requires length = n_samples\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the log probability of the sample for each class in\n the model, where classes are ordered arithmetically for each\n output." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples. Passing None as test samples gives the same result as passing real test samples, since DummyClassifier operates independently of the sampled observations." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyClassifier\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) wrt. y." - } - ], - "docstring": "DummyClassifier is a classifier that makes predictions using simple rules.\n\nThis classifier is useful as a simple baseline to compare with other\n(real) classifiers. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"}, default=\"prior\"\n Strategy to use to generate predictions.\n\n * \"stratified\": generates predictions by respecting the training\n set's class distribution.\n * \"most_frequent\": always predicts the most frequent label in the\n training set.\n * \"prior\": always predicts the class that maximizes the class prior\n (like \"most_frequent\") and ``predict_proba`` returns the class prior.\n * \"uniform\": generates predictions uniformly at random.\n * \"constant\": always predicts a constant label that is provided by\n the user. This is useful for metrics that evaluate a non-majority\n class\n\n .. versionchanged:: 0.24\n The default value of `strategy` has changed to \"prior\" in version\n 0.24.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness to generate the predictions when\n ``strategy='stratified'`` or ``strategy='uniform'``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nconstant : int or str or array-like of shape (n_outputs,)\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of such arrays\n Class labels for each output.\n\nn_classes_ : int or list of int\n Number of label for each output.\n\nclass_prior_ : ndarray of shape (n_classes,) or list of such arrays\n Probability of each class for each output.\n\nn_outputs_ : int\n Number of outputs.\n\nsparse_output_ : bool\n True if the array returned from predict is to be in sparse CSC format.\n Is automatically set to True if the input y is passed in sparse format.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyClassifier\n>>> X = np.array([-1, 1, 1, 1])\n>>> y = np.array([0, 1, 1, 1])\n>>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n>>> dummy_clf.fit(X, y)\nDummyClassifier(strategy='most_frequent')\n>>> dummy_clf.predict(X)\narray([1, 1, 1, 1])\n>>> dummy_clf.score(X, y)\n0.75" - }, - { - "name": "DummyRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "strategy", - "type": "Literal[\"mean\", \"median\", \"quantile\", \"constant\"]", - "hasDefault": true, - "default": "\"mean\"", - "limitation": null, - "ignored": false, - "docstring": "Strategy to use to generate predictions. * \"mean\": always predicts the mean of the training set * \"median\": always predicts the median of the training set * \"quantile\": always predicts a specified quantile of the training set, provided with the quantile parameter. * \"constant\": always predicts a constant value that is provided by the user." - }, - { - "name": "constant", - "type": "Union[float, int, ArrayLike]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The explicit constant as predicted by the \"constant\" strategy. This parameter is useful only for the \"constant\" strategy." - }, - { - "name": "quantile", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The quantile to predict using the \"quantile\" strategy. A quantile of 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the maximum." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the random regressor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the standard deviation of posterior prediction. All zeros in this case. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n All zeros in this case.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n\ny_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Standard deviation of predictive distribution of query points." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples. Passing None as test samples gives the same result as passing real test samples, since DummyRegressor operates independently of the sampled observations." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the coefficient of determination R^2 of the prediction.\n\nThe coefficient R^2 is defined as (1 - u/v), where u is the residual\nsum of squares ((y_true - y_pred) ** 2).sum() and v is the total\nsum of squares ((y_true - y_true.mean()) ** 2).sum().\nThe best possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyRegressor\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n R^2 of self.predict(X) wrt. y." - } - ], - "docstring": "DummyRegressor is a regressor that makes predictions using\nsimple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n Strategy to use to generate predictions.\n\n * \"mean\": always predicts the mean of the training set\n * \"median\": always predicts the median of the training set\n * \"quantile\": always predicts a specified quantile of the training set,\n provided with the quantile parameter.\n * \"constant\": always predicts a constant value that is provided by\n the user.\n\nconstant : int or float or array-like of shape (n_outputs,), default=None\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nquantile : float in [0.0, 1.0], default=None\n The quantile to predict using the \"quantile\" strategy. A quantile of\n 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n maximum.\n\nAttributes\n----------\nconstant_ : ndarray of shape (1, n_outputs)\n Mean or median or quantile of the training targets or constant value\n given by the user.\n\nn_outputs_ : int\n Number of outputs.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyRegressor\n>>> X = np.array([1.0, 2.0, 3.0, 4.0])\n>>> y = np.array([2.0, 3.0, 5.0, 10.0])\n>>> dummy_regr = DummyRegressor(strategy=\"mean\")\n>>> dummy_regr.fit(X, y)\nDummyRegressor()\n>>> dummy_regr.predict(X)\narray([5., 5., 5., 5.])\n>>> dummy_regr.score(X, y)\n0.0" - } - ], - "functions": [] - }, - { - "name": "sklearn.exceptions", - "imports": [ - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "NotFittedError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Exception class to raise if estimator is used before fitting.\n\nThis class inherits from both ValueError and AttributeError to help with\nexception handling and backward compatibility.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.exceptions import NotFittedError\n>>> try:\n... LinearSVC().predict([[1, 2], [2, 3], [3, 4]])\n... except NotFittedError as e:\n... print(repr(e))\nNotFittedError(\"This LinearSVC instance is not fitted yet. Call 'fit' with\nappropriate arguments before using this estimator.\"...)\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation." - }, - { - "name": "ChangedBehaviorWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning class used to notify the user of any change in the behavior.\n\n.. versionchanged:: 0.18\n Moved from sklearn.base." - }, - { - "name": "ConvergenceWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils." - }, - { - "name": "DataConversionWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n - passes an integer array to a function which expects float input and\n will convert the input\n - requests a non-copying operation, but a copy is required to meet the\n implementation's data-type expectations;\n - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation." - }, - { - "name": "DataDimensionalityWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils." - }, - { - "name": "EfficiencyWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used to notify the user of inefficient computation.\n\nThis warning notifies the user that the efficiency may not be optimal due\nto some reason which may be included as a part of the warning message.\nThis may be subclassed into a more specific Warning class.\n\n.. versionadded:: 0.18" - }, - { - "name": "FitFailedWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n Moved from sklearn.cross_validation." - }, - { - "name": "NonBLASDotWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used when the dot operation does not use BLAS.\n\nThis warning is used to notify the user that BLAS was not used for dot\noperation and hence the efficiency may be affected.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation, extends EfficiencyWarning." - }, - { - "name": "SkipTestWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning class used to notify the user of a test that was skipped.\n\nFor example, one of the estimator checks requires a pandas import.\nIf the pandas package cannot be imported, the test will be skipped rather\nthan register as a failure." - }, - { - "name": "UndefinedMetricWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n Moved from sklearn.base." - }, - { - "name": "PositiveSpectrumWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning raised when the eigenvalues of a PSD matrix have issues\n\nThis warning is typically raised by ``_check_psd_eigenvalues`` when the\neigenvalues of a positive semidefinite (PSD) matrix such as a gram matrix\n(kernel) present significant negative eigenvalues, or bad conditioning i.e.\nvery small non-zero eigenvalues compared to the largest eigenvalue.\n\n.. versionadded:: 0.22" - } - ], - "functions": [] - }, - { - "name": "sklearn.isotonic", - "imports": [ - "import numpy as np", - "from scipy import interpolate", - "from scipy.stats import spearmanr", - "import warnings", - "import math", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from base import RegressorMixin", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from _isotonic import _inplace_contiguous_isotonic_regression", - "from _isotonic import _make_unique" - ], - "classes": [ - { - "name": "IsotonicRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "y_min", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Lower bound on the lowest predicted value (the minimum value may still be higher). If not set, defaults to -inf." - }, - { - "name": "y_max", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on the highest predicted value (the maximum may still be lower). If not set, defaults to +inf." - }, - { - "name": "increasing", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the predictions should be constrained to increase or decrease with `X`. 'auto' will decide based on the Spearman correlation estimate's sign." - }, - { - "name": "out_of_bounds", - "type": "Literal['nan', 'clip', 'raise']", - "hasDefault": true, - "default": "'nan'", - "limitation": null, - "ignored": false, - "docstring": "Handles how `X` values outside of the training domain are handled during prediction. - 'nan', predictions will be NaN. - 'clip', predictions will be set to the value corresponding to the nearest train interval endpoint. - 'raise', a `ValueError` is raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_input_data_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build the f_ interp1d function." - }, - { - "name": "_build_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build the y_ IsotonicRegression." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. .. versionchanged:: 0.24 Also accepts 2d array with 1 feature." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training target." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights. If set to None, all weights will be set to 1 (equal weights)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,) or (n_samples, 1)\n Training data.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights. If set to None, all weights will be set to 1 (equal\n weights).\n\nReturns\n-------\nself : object\n Returns an instance of self.\n\nNotes\n-----\nX is stored for future use, as :meth:`transform` needs X to interpolate\nnew input data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "T", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform. .. versionchanged:: 0.24 Also accepts 2d array with 1 feature." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform new data by linear interpolation\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n The transformed data" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "T", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Transformed data." - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pickle-protocol - return state of the estimator. " - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pickle-protocol - set state of the estimator.\n\nWe need to rebuild the interpolation function." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool or 'auto', default=True\n Determines whether the predictions should be constrained to increase\n or decrease with `X`. 'auto' will decide based on the Spearman\n correlation estimate's sign.\n\nout_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n Handles how `X` values outside of the training domain are handled\n during prediction.\n\n - 'nan', predictions will be NaN.\n - 'clip', predictions will be set to the value corresponding to\n the nearest train interval endpoint.\n - 'raise', a `ValueError` is raised.\n\nAttributes\n----------\nX_min_ : float\n Minimum value of input array `X_` for left bound.\n\nX_max_ : float\n Maximum value of input array `X_` for right bound.\n\nX_thresholds_ : ndarray of shape (n_thresholds,)\n Unique ascending `X` values used to interpolate\n the y = f(X) monotonic function.\n\n .. versionadded:: 0.24\n\ny_thresholds_ : ndarray of shape (n_thresholds,)\n De-duplicated `y` values suitable to interpolate the y = f(X)\n monotonic function.\n\n .. versionadded:: 0.24\n\nf_ : function\n The stepwise interpolating function that covers the input domain ``X``.\n\nincreasing_ : bool\n Inferred value for ``increasing``.\n\nNotes\n-----\nTies are broken using the secondary method from de Leeuw, 1977.\n\nReferences\n----------\nIsotonic Median Regression: A Linear Programming Approach\nNilotpal Chakravarti\nMathematics of Operations Research\nVol. 14, No. 2 (May, 1989), pp. 303-308\n\nIsotone Optimization in R : Pool-Adjacent-Violators\nAlgorithm (PAVA) and Active Set Methods\nde Leeuw, Hornik, Mair\nJournal of Statistical Software 2009\n\nCorrectness of Kruskal's algorithms for monotone regression with ties\nde Leeuw, Psychometrica, 1977\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.isotonic import IsotonicRegression\n>>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n>>> iso_reg = IsotonicRegression().fit(X, y)\n>>> iso_reg.predict([.1, .2])\narray([1.8628..., 3.7256...])" - } - ], - "functions": [ - { - "name": "check_increasing", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training target." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine whether y is monotonically correlated with x.\n\ny is found increasing or decreasing with respect to x based on a Spearman\ncorrelation test.\n\nParameters\n----------\nx : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nReturns\n-------\nincreasing_bool : boolean\n Whether the relationship is increasing or decreasing.\n\nNotes\n-----\nThe Spearman correlation coefficient is estimated from the data, and the\nsign of the resulting estimate is used as the result.\n\nIn the event that the 95% confidence interval based on Fisher transform\nspans zero, a warning is raised.\n\nReferences\n----------\nFisher transformation. Wikipedia.\nhttps://en.wikipedia.org/wiki/Fisher_transformation" - }, - { - "name": "isotonic_regression", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights on each point of the regression. If None, weight is set to 1 (equal weights)." - }, - { - "name": "y_min", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Lower bound on the lowest predicted value (the minimum value may still be higher). If not set, defaults to -inf." - }, - { - "name": "y_max", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on the highest predicted value (the maximum may still be lower). If not set, defaults to +inf." - }, - { - "name": "increasing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to compute ``y_`` is increasing (if set to True) or decreasing (if set to False)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solve the isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n The data.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights on each point of the regression.\n If None, weight is set to 1 (equal weights).\n\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool, default=True\n Whether to compute ``y_`` is increasing (if set to True) or decreasing\n (if set to False)\n\nReturns\n-------\ny_ : list of floats\n Isotonic fit of y.\n\nReferences\n----------\n\"Active set algorithms for isotonic regression; A unifying framework\"\nby Michael J. Best and Nilotpal Chakravarti, section 3." - } - ] - }, - { - "name": "sklearn.kernel_approximation", - "imports": [ - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.linalg import svd", - "from scipy.fft import fft", - "from scipy.fft import ifft", - "from scipy.fftpack import fft", - "from scipy.fftpack import ifft", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils import as_float_array", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import check_is_fitted", - "from metrics.pairwise import pairwise_kernels", - "from metrics.pairwise import KERNEL_PARAMS", - "from utils.validation import check_non_negative", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "PolynomialCountSketch", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter of the polynomial kernel whose feature map will be approximated." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel whose feature map will be approximated." - }, - { - "name": "coef0", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant term of the polynomial kernel whose feature map will be approximated." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the output feature space. Usually, n_components should be greater than the number of features in input samples in order to achieve good performance. The optimal score / run time balance is typically achieved around n_components = 10 * n_features, but this depends on the specific dataset being used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for indexHash and bitHash initialization. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the feature map approximation for X.\n\nParameters\n----------\nX : {array-like}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - } - ], - "docstring": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of the polynomial kernel whose feature map\n will be approximated.\n\ndegree : int, default=2\n Degree of the polynomial kernel whose feature map\n will be approximated.\n\ncoef0 : int, default=0\n Constant term of the polynomial kernel whose feature map\n will be approximated.\n\nn_components : int, default=100\n Dimensionality of the output feature space. Usually, n_components\n should be greater than the number of features in input samples in\n order to achieve good performance. The optimal score / run time\n balance is typically achieved around n_components = 10 * n_features,\n but this depends on the specific dataset being used.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for indexHash and bitHash\n initialization. Pass an int for reproducible results across multiple\n function calls. See :term:`Glossary `.\n\nAttributes\n----------\nindexHash_ : ndarray of shape (degree, n_features), dtype=int64\n Array of indexes in range [0, n_components) used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nbitHash_ : ndarray of shape (degree, n_features), dtype=float32\n Array with random entries in {+1, -1}, used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import PolynomialCountSketch\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> ps = PolynomialCountSketch(degree=3, random_state=1)\n>>> X_features = ps.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0" - }, - { - "name": "RBFSampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter of RBF kernel: exp(-gamma * x^2)" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the generation of the random weights and random offset when fitting the training data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - } - ], - "docstring": "Approximates feature map of an RBF kernel by Monte Carlo approximation\nof its Fourier transform.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of RBF kernel: exp(-gamma * x^2)\n\nn_components : int, default=100\n Number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_offset_ : ndarray of shape (n_components,), dtype=float64\n Random offset used to compute the projection in the `n_components`\n dimensions of the feature space.\n\nrandom_weights_ : ndarray of shape (n_features, n_components), dtype=float64\n Random projection directions drawn from the Fourier transform\n of the RBF kernel.\n\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import RBFSampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n>>> X_features = rbf_feature.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=5)\n>>> clf.score(X_features, y)\n1.0\n\nNotes\n-----\nSee \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\nBenjamin Recht.\n\n[1] \"Weighted Sums of Random Kitchen Sinks: Replacing\nminimization with randomization in learning\" by A. Rahimi and\nBenjamin Recht.\n(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)" - }, - { - "name": "SkewedChi2Sampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "skewedness", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "\"skewedness\" parameter of the kernel. Needs to be cross-validated." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the generation of the random weights and random offset when fitting the training data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples in the number of samples and n_features is the number of features. All values of X must be strictly greater than \"-skewedness\"." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features. All values of X must be\n strictly greater than \"-skewedness\".\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - } - ], - "docstring": "Approximates feature map of the \"skewed chi-squared\" kernel by Monte\nCarlo approximation of its Fourier transform.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nskewedness : float, default=1.0\n \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\nn_components : int, default=100\n number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_weights_ : ndarray of shape (n_features, n_components)\n Weight array, sampled from a secant hyperbolic distribution, which will\n be used to linearly transform the log of the data.\n\nrandom_offset_ : ndarray of shape (n_features, n_components)\n Bias term, which will be added to the data. It is uniformly distributed\n between 0 and 2*pi.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import SkewedChi2Sampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n... n_components=10,\n... random_state=0)\n>>> X_features = chi2_feature.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0\n\nReferences\n----------\nSee \"Random Fourier Approximations for Skewed Multiplicative Histogram\nKernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\nSee Also\n--------\nAdditiveChi2Sampler : A different approach for approximating an additive\n variant of the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel." - }, - { - "name": "AdditiveChi2Sampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "sample_steps", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Gives the number of (complex) sampling points." - }, - { - "name": "sample_interval", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sampling interval. Must be specified when sample_steps not in {1,2,3}." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : {ndarray, sparse matrix}, shape = (n_samples, n_features * (2*sample_steps + 1))\n Whether the return value is an array of sparse matrix depends on\n the type of the input X." - }, - { - "name": "_transform_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately. Each entry in the original\nspace is transformed into 2*sample_steps+1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsample_steps : int, default=2\n Gives the number of (complex) sampling points.\nsample_interval : float, default=None\n Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\nAttributes\n----------\nsample_interval_ : float\n Stored sampling interval. Specified as a parameter if sample_steps not\n in {1,2,3}.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n>>> X, y = load_digits(return_X_y=True)\n>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n>>> X_transformed = chi2sampler.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n>>> clf.fit(X_transformed, y)\nSGDClassifier(max_iter=5, random_state=0)\n>>> clf.score(X_transformed, y)\n0.9499...\n\nNotes\n-----\nThis estimator approximates a slightly different version of the additive\nchi squared kernel then ``metric.additive_chi2`` computes.\n\nSee Also\n--------\nSkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\nsklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n squared kernel.\n\nReferences\n----------\nSee `\"Efficient additive kernels via explicit feature maps\"\n`_\nA. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n2011" - }, - { - "name": "Nystroem", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Kernel map to be approximated. A callable should accept two arguments and the keyword arguments passed to this object as kernel_params, and should return a floating point number." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "degree", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters (keyword arguments) for kernel function passed as callable object." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of features to construct. How many data points will be used to construct the mapping." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the uniform sampling without replacement of n_components of the training data to construct the basis kernel. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the kernel matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply feature map to X.\n\nComputes an approximate feature map using the kernel\nbetween some training points and X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform.\n\nReturns\n-------\nX_transformed : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "_get_kernel_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nkernel : string or callable, default='rbf'\n Kernel map to be approximated. A callable should accept two arguments\n and the keyword arguments passed to this object as kernel_params, and\n should return a floating point number.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ncoef0 : float, default=None\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\ndegree : float, default=None\n Degree of the polynomial kernel. Ignored by other kernels.\n\nkernel_params : dict, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nn_components : int, default=100\n Number of features to construct.\n How many data points will be used to construct the mapping.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the uniform sampling without\n replacement of n_components of the training data to construct the basis\n kernel.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the kernel matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Subset of training points used to construct the feature map.\n\ncomponent_indices_ : ndarray of shape (n_components)\n Indices of ``components_`` in the training set.\n\nnormalization_ : ndarray of shape (n_components, n_components)\n Normalization matrix needed for embedding.\n Square root of the kernel matrix on ``components_``.\n\nExamples\n--------\n>>> from sklearn import datasets, svm\n>>> from sklearn.kernel_approximation import Nystroem\n>>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n>>> data = X / 16.\n>>> clf = svm.LinearSVC()\n>>> feature_map_nystroem = Nystroem(gamma=.2,\n... random_state=1,\n... n_components=300)\n>>> data_transformed = feature_map_nystroem.fit_transform(data)\n>>> clf.fit(data_transformed, y)\nLinearSVC()\n>>> clf.score(data_transformed, y)\n0.9987...\n\nReferences\n----------\n* Williams, C.K.I. and Seeger, M.\n \"Using the Nystroem method to speed up kernel machines\",\n Advances in neural information processing systems 2001\n\n* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n Comparison\",\n Advances in Neural Information Processing Systems 2012\n\n\nSee Also\n--------\nRBFSampler : An approximation to the RBF kernel using random Fourier\n features.\n\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels." - } - ], - "functions": [] - }, - { - "name": "sklearn.kernel_ridge", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from metrics.pairwise import pairwise_kernels", - "from linear_model._ridge import _solve_cholesky_kernel", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "KernelRidge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number. See :ref:`ridge_regression` for formula." - }, - { - "name": "kernel", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"linear\"", - "limitation": null, - "ignored": false, - "docstring": "Kernel mapping used internally. This parameter is directly passed to :class:`~sklearn.metrics.pairwise.pairwise_kernel`. If `kernel` is a string, it must be one of the metrics in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`. If `kernel` is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if `kernel` is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two rows from X as input and return the corresponding kernel value as a single number. This means that callables from :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on matrices, not single samples. Use the string identifying the kernel instead." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels." - }, - { - "name": "degree", - "type": "float", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters (keyword arguments) for kernel function passed as callable object." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. If kernel == \"precomputed\" this is instead a precomputed kernel matrix, of shape (n_samples, n_samples)." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "sample_weight", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample, ignored if None is passed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Kernel Ridge regression model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. If kernel == \"precomputed\" this is instead\n a precomputed kernel matrix, of shape (n_samples, n_samples).\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample, ignored if None is passed.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. If kernel == \"precomputed\" this is instead a precomputed kernel matrix, shape = [n_samples, n_samples_fitted], where n_samples_fitted is the number of samples used in the fitting for this estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the kernel ridge model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples. If kernel == \"precomputed\" this is instead a\n precomputed kernel matrix, shape = [n_samples,\n n_samples_fitted], where n_samples_fitted is the number of\n samples used in the fitting for this estimator.\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Returns predicted values." - } - ], - "docstring": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float or array-like of shape (n_targets,), default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number. See :ref:`ridge_regression` for formula.\n\nkernel : string or callable, default=\"linear\"\n Kernel mapping used internally. This parameter is directly passed to\n :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n If `kernel` is a string, it must be one of the metrics\n in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.\n If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if `kernel` is a callable function, it is called on\n each pair of instances (rows) and the resulting value recorded. The\n callable should take two rows from X as input and return the\n corresponding kernel value as a single number. This means that\n callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n they operate on matrices, not single samples. Use the string\n identifying the kernel instead.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : mapping of string to any, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nAttributes\n----------\ndual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Representation of weight vector(s) in kernel space\n\nX_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data, which is also required for prediction. If\n kernel == \"precomputed\" this is instead the precomputed\n training matrix, of shape (n_samples, n_samples).\n\nReferences\n----------\n* Kevin P. Murphy\n \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n chapter 14.4.3, pp. 492-493\n\nSee Also\n--------\nsklearn.linear_model.Ridge : Linear ridge regression.\nsklearn.svm.SVR : Support Vector Regression implemented using libsvm.\n\nExamples\n--------\n>>> from sklearn.kernel_ridge import KernelRidge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = KernelRidge(alpha=1.0)\n>>> clf.fit(X, y)\nKernelRidge(alpha=1.0)" - } - ], - "functions": [] - }, - { - "name": "sklearn.multiclass", - "imports": [ - "import array", - "import numpy as np", - "import warnings", - "import scipy.sparse as sp", - "import itertools", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import clone", - "from base import is_classifier", - "from base import MultiOutputMixin", - "from base import MetaEstimatorMixin", - "from base import is_regressor", - "from base import _is_pairwise", - "from preprocessing import LabelBinarizer", - "from metrics.pairwise import euclidean_distances", - "from utils import check_random_state", - "from utils.deprecation import deprecated", - "from utils._tags import _safe_tags", - "from utils.validation import _num_samples", - "from utils.validation import check_is_fitted", - "from utils.validation import check_X_y", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.multiclass import check_classification_targets", - "from utils.multiclass import _ovr_decision_function", - "from utils.metaestimators import _safe_split", - "from utils.metaestimators import if_delegate_has_method", - "from utils.fixes import delayed", - "from exceptions import NotFittedError", - "from joblib import Parallel" - ], - "classes": [ - { - "name": "_ConstantPredictor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "OneVsRestClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the `n_classes` one-vs-rest problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets. An indicator matrix turns on multilabel classification." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nReturns\n-------\nself" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets. An indicator matrix turns on multilabel classification." - }, - { - "name": "classes", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is only required in the first call of partial_fit and can be omitted in the subsequent calls." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Predicted multi-class targets." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by label of classes.\n\nNote that in the multilabel case, each sample can have any number of\nlabels. This returns the marginal probability that the given sample has\nthe label in question. For example, it is entirely consistent that two\nlabels both have a 90% probability of applying to a given sample.\n\nIn the single label multiclass case, the rows of the returned matrix\nsum to 1.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : (sparse) array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the distance of each sample from the decision boundary for\neach class. This can only be used with estimators which implement the\ndecision_function method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification." - }, - { - "name": "multilabel_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Whether this is a multilabel classifier" - }, - { - "name": "n_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - }, - { - "name": "_first_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes`\n one-vs-rest problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of `n_classes` estimators\n Estimators used for predictions.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function. This attribute\n exists only if the ``estimators_`` defines ``coef_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nintercept_ : ndarray of shape (1, 1) or (n_classes, 1)\n If ``y`` is binary, the shape is ``(1, 1)`` else ``(n_classes, 1)``\n This attribute exists only if the ``estimators_`` defines\n ``intercept_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nclasses_ : array, shape = [`n_classes`]\n Class labels.\n\nn_classes_ : int\n Number of classes.\n\nlabel_binarizer_ : LabelBinarizer object\n Object used to transform multiclass labels to binary labels and\n vice-versa.\n\nmultilabel_ : boolean\n Whether a OneVsRestClassifier is a multilabel classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.multiclass import OneVsRestClassifier\n>>> from sklearn.svm import SVC\n>>> X = np.array([\n... [10, 10],\n... [8, 10],\n... [-5, 5.5],\n... [-5.4, 5.5],\n... [-20, -20],\n... [-15, -20]\n... ])\n>>> y = np.array([0, 0, 1, 1, 2, 2])\n>>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n>>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\narray([2, 0, 1])\n\nSee Also\n--------\nsklearn.multioutput.MultiOutputClassifier : Alternate way of extending an\n estimator for multilabel classification.\nsklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n to binary indicator matrix." - }, - { - "name": "OneVsOneClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the `n_classes * ( n_classes - 1) / 2` OVO problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nReturns\n-------\nself" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets." - }, - { - "name": "classes", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is only required in the first call of partial_fit and can be omitted in the subsequent calls." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.\n\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Estimate the best class label for each sample in X.\n\nThis is implemented as ``argmax(decision_function(X), axis=1)`` which\nwill return the label of the class with most votes by estimators\npredicting the outcome of a decision for each possible class pair.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decision function for the OneVsOneClassifier.\n\nThe decision values for the samples are computed by adding the\nnormalized sum of pair-wise classification confidence levels to the\nvotes in order to disambiguate between the decision values when the\nvotes for all the classes are equal leading to a tie.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification." - }, - { - "name": "n_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - } - ], - "docstring": "One-vs-one multiclass strategy\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes * (\n n_classes - 1) / 2` OVO problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\nn_classes_ : int\n Number of classes\n\npairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n Indices of samples used when training the estimators.\n ``None`` when ``estimator``'s `pairwise` tag is False.\n\n .. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.25) and onward, `pairwise_indices_` will use the\n pairwise estimator tag instead.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multiclass import OneVsOneClassifier\n>>> from sklearn.svm import LinearSVC\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, shuffle=True, random_state=0)\n>>> clf = OneVsOneClassifier(\n... LinearSVC(random_state=0)).fit(X_train, y_train)\n>>> clf.predict(X_test[:10])\narray([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])" - }, - { - "name": "OutputCodeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`." - }, - { - "name": "code_size", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Percentage of the number of classes to be used to create the code book. A number between 0 and 1 will require fewer classifiers than one-vs-the-rest. A number greater than 1 will require more classifiers than one-vs-the-rest." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to initialize the codebook. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the multiclass problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : numpy array of shape [n_samples]\n Multi-class targets.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets." - } - ], - "docstring": "(Error-Correcting) Output-Code multiclass strategy\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted. At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\ncode_size : float\n Percentage of the number of classes to be used to create the code book.\n A number between 0 and 1 will require fewer classifiers than\n one-vs-the-rest. A number greater than 1 will require more classifiers\n than one-vs-the-rest.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the codebook.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the multiclass problems\n are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of `int(n_classes * code_size)` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\ncode_book_ : numpy array of shape [n_classes, code_size]\n Binary array containing the code of each class.\n\nExamples\n--------\n>>> from sklearn.multiclass import OutputCodeClassifier\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = OutputCodeClassifier(\n... estimator=RandomForestClassifier(random_state=0),\n... random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] \"Solving multiclass learning problems via error-correcting output\n codes\",\n Dietterich T., Bakiri G.,\n Journal of Artificial Intelligence Research 2,\n 1995.\n\n.. [2] \"The error coding method and PICTs\",\n James G., Hastie T.,\n Journal of Computational and Graphical statistics 7,\n 1998.\n\n.. [3] \"The Elements of Statistical Learning\",\n Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n 2008." - } - ], - "functions": [ - { - "name": "_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a single binary estimator." - }, - { - "name": "_partial_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Partially fit a single binary estimator." - }, - { - "name": "_predict_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make predictions using a single binary estimator." - }, - { - "name": "_check_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that an estimator implements the necessary methods." - }, - { - "name": "_fit_ovo_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a single binary estimator (one-vs-one)." - }, - { - "name": "_partial_fit_ovo_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Partially fit a single binary estimator(one-vs-one)." - } - ] - }, - { - "name": "sklearn.multioutput", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from joblib import Parallel", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from base import BaseEstimator", - "from base import clone", - "from base import MetaEstimatorMixin", - "from base import RegressorMixin", - "from base import ClassifierMixin", - "from base import is_classifier", - "from model_selection import cross_val_predict", - "from utils import check_array", - "from utils import check_X_y", - "from utils import check_random_state", - "from utils.metaestimators import if_delegate_has_method", - "from utils.validation import check_is_fitted", - "from utils.validation import has_fit_parameter", - "from utils.validation import _check_fit_params", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "_MultiOutputEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-output targets." - }, - { - "name": "classes", - "type": "List[NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Each array is unique classes for one output in str/int Can be obtained by via ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the target matrix of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nclasses : list of ndarray of shape (n_outputs,)\n Each array is unique classes for one output in str/int\n Can be obtained by via\n ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\n target matrix of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-output targets. An indicator matrix turns on multilabel estimation." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``estimator.fit`` method of each step. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets. An indicator matrix turns on multilabel\n estimation.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict multi-output variable using a model\n trained for each target variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets predicted across multiple predictors.\n Note: Separate models are generated for each predictor." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MultiOutputRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and :term:`predict`." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported by the passed estimator) will be parallelized for each target. When individual estimators are fast to train or predict, using ``n_jobs > 1`` can result in slower performance due to the parallelism overhead. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all available processes / threads. See :term:`Glossary ` for more details. .. versionchanged:: 0.20 `n_jobs` default changed from 1 to None" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-output targets." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Multi target regression\n\nThis strategy consists of fitting one regressor per target. This is a\nsimple strategy for extending regressors that do not natively support\nmulti-target regression.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and :term:`predict`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import load_linnerud\n>>> from sklearn.multioutput import MultiOutputRegressor\n>>> from sklearn.linear_model import Ridge\n>>> X, y = load_linnerud(return_X_y=True)\n>>> clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)\n>>> clf.predict(X[[0]])\narray([[176..., 35..., 57...]])" - }, - { - "name": "MultiOutputClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit`, :term:`score` and :term:`predict_proba`." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported by the passed estimator) will be parallelized for each target. When individual estimators are fast to train or predict, using ``n_jobs > 1`` can result in slower performance due to the parallelism overhead. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all available processes / threads. See :term:`Glossary ` for more details. .. versionchanged:: 0.20 `n_jobs` default changed from 1 to None" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying classifier supports sample weights." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``estimator.fit`` method of each step. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying classifier supports sample\n weights.\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\nReturns prediction probabilities for each class of each output.\n\nThis method will raise a ``ValueError`` if any of the\nestimators do not have ``predict_proba``.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data\n\nReturns\n-------\np : array of shape (n_samples, n_classes), or a list of n_outputs such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n\n .. versionchanged:: 0.19\n This function now returns a list of arrays where the length of\n the list is ``n_outputs``, and each array is (``n_samples``,\n ``n_classes``) for that particular output." - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values for X" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the mean accuracy on the given test data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples\n\ny : array-like of shape (n_samples, n_outputs)\n True values for X\n\nReturns\n-------\nscores : float\n accuracy_score of self.predict(X) versus y" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi target classification\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit`, :term:`score` and\n :term:`predict_proba`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> from sklearn.neighbors import KNeighborsClassifier\n\n>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n>>> clf = MultiOutputClassifier(KNeighborsClassifier()).fit(X, y)\n>>> clf.predict(X[-2:])\narray([[1, 1, 0], [1, 1, 1]])" - }, - { - "name": "_BaseChain", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the `fit` method of each step. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict on the data matrix X using the ClassifierChain model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nY_pred : array-like of shape (n_samples, n_classes)\n The predicted values." - } - ], - "docstring": null - }, - { - "name": "ClassifierChain", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the classifier chain is built." - }, - { - "name": "order", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, the order will be determined by the order of columns in the label matrix Y.:: order = [0, 1, 2, ..., Y.shape[1] - 1] The order of the chain can be explicitly set by providing a list of integers. For example, for a chain of length 5.:: order = [1, 3, 2, 4, 0] means that the first model in the chain will make predictions for column 1 in the Y matrix, the second model will make predictions for column 3, etc. If order is 'random' a random ordering will be used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines whether to use cross validated predictions or true labels for the results of previous estimators in the chain. Possible inputs for cv are: - None, to use true labels when fitting, - integer, to specify the number of folds in a (Stratified)KFold, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``order='random'``, determines random number generation for the chain order. In addition, it controls the random seed given at each `base_estimator` at each chaining iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nY_prob : array-like of shape (n_samples, n_classes)" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the decision_function of the models in the chain.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY_decision : array-like of shape (n_samples, n_classes)\n Returns the decision function of the sample for each model\n in the chain." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : list\n A list of arrays of length ``len(estimators_)`` containing the\n class labels for each estimator in the chain.\n\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multioutput import ClassifierChain\n>>> X, Y = make_multilabel_classification(\n... n_samples=12, n_classes=3, random_state=0\n... )\n>>> X_train, X_test, Y_train, Y_test = train_test_split(\n... X, Y, random_state=0\n... )\n>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n>>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n>>> chain.fit(X_train, Y_train).predict(X_test)\narray([[1., 1., 0.],\n [1., 0., 0.],\n [0., 1., 0.]])\n>>> chain.predict_proba(X_test)\narray([[0.8387..., 0.9431..., 0.4576...],\n [0.8878..., 0.3684..., 0.2640...],\n [0.0321..., 0.9935..., 0.0625...]])\n\nSee Also\n--------\nRegressorChain : Equivalent for regression.\nMultioutputClassifier : Classifies each output independently rather than\n chaining.\n\nReferences\n----------\nJesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\nChains for Multi-label Classification\", 2009." - }, - { - "name": "RegressorChain", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the classifier chain is built." - }, - { - "name": "order", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, the order will be determined by the order of columns in the label matrix Y.:: order = [0, 1, 2, ..., Y.shape[1] - 1] The order of the chain can be explicitly set by providing a list of integers. For example, for a chain of length 5.:: order = [1, 3, 2, 4, 0] means that the first model in the chain will make predictions for column 1 in the Y matrix, the second model will make predictions for column 3, etc. If order is 'random' a random ordering will be used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines whether to use cross validated predictions or true labels for the results of previous estimators in the chain. Possible inputs for cv are: - None, to use true labels when fitting, - integer, to specify the number of folds in a (Stratified)KFold, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``order='random'``, determines random number generation for the chain order. In addition, it controls the random seed given at each `base_estimator` at each chaining iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the `fit` method at each step of the regressor chain. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method at each step\n of the regressor chain.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.multioutput import RegressorChain\n>>> from sklearn.linear_model import LogisticRegression\n>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n>>> chain.predict(X)\narray([[0., 2.],\n [1., 1.],\n [2., 0.]])\n\nSee Also\n--------\nClassifierChain : Equivalent for classification.\nMultioutputRegressor : Learns each output independently rather than\n chaining." - } - ], - "functions": [ - { - "name": "_fit_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_fit_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.naive_bayes", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import logsumexp", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from preprocessing import binarize", - "from preprocessing import LabelBinarizer", - "from preprocessing import label_binarize", - "from utils import check_X_y", - "from utils import check_array", - "from utils import deprecated", - "from utils.extmath import safe_sparse_dot", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.validation import check_is_fitted", - "from utils.validation import check_non_negative", - "from utils.validation import column_or_1d", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "_BaseNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_classes, n_samples).\n\nInput is passed to _joint_log_likelihood as-is by predict,\npredict_proba and predict_log_proba." - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "To be overridden in subclasses with the actual checks." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X" - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return log-probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`." - } - ], - "docstring": "Abstract base class for naive Bayes estimators" - }, - { - "name": "GaussianNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "priors", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - }, - { - "name": "var_smoothing", - "type": "float", - "hasDefault": true, - "default": "1e-9", - "limitation": null, - "ignored": false, - "docstring": "Portion of the largest variance of all features that is added to variances for calculation stability. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted). .. versionadded:: 0.17 Gaussian Naive Bayes supports fitting with *sample_weight*." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian Naive Bayes according to X, y\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*.\n\nReturns\n-------\nself : object" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_mean_variance", - "decorators": [], - "parameters": [ - { - "name": "n_past", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples represented in old mean and variance. If sample weights were given, this should contain the sum of sample weights represented in old mean and variance." - }, - { - "name": "mu", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Means for Gaussians in original set." - }, - { - "name": "var", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Variances for Gaussians in original set." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute online update of Gaussian mean and variance.\n\nGiven starting sample count, mean, and variance, a new set of\npoints X, and optionally sample weights, return the updated mean and\nvariance. (NB - each dimension (column) in X is treated as independent\n-- you get variance, not covariance).\n\nCan take scalar mean and variance, or vector mean and variance to\nsimultaneously update a number of independent Gaussians.\n\nSee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nParameters\n----------\nn_past : int\n Number of samples represented in old mean and variance. If sample\n weights were given, this should contain the sum of sample\n weights represented in old mean and variance.\n\nmu : array-like of shape (number of Gaussians,)\n Means for Gaussians in original set.\n\nvar : array-like of shape (number of Gaussians,)\n Variances for Gaussians in original set.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\ntotal_mu : array-like of shape (number of Gaussians,)\n Updated mean for each Gaussian over the combined set.\n\ntotal_var : array-like of shape (number of Gaussians,)\n Updated variance for each Gaussian over the combined set." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted). .. versionadded:: 0.17" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n\nReturns\n-------\nself : object" - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "_refit", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, act as though this were the first time we called _partial_fit (ie, throw away any past fitting and start over)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Actual implementation of Gaussian NB fitting.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n_refit : bool, default=False\n If true, act as though this were the first time we called\n _partial_fit (ie, throw away any past fitting and start over).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Gaussian Naive Bayes (GaussianNB)\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : array-like of shape (n_classes,)\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n Portion of the largest variance of all features that is added to\n variances for calculation stability.\n\n .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n class labels known to the classifier\n\nepsilon_ : float\n absolute additive value to variances\n\nsigma_ : ndarray of shape (n_classes, n_features)\n variance of each feature per class\n\ntheta_ : ndarray of shape (n_classes, n_features)\n mean of each feature per class\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]" - }, - { - "name": "_BaseDiscreteNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_class_log_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "_init_counters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB" - }, - { - "name": "MultinomialNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count and smooth feature occurrences." - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply smoothing to raw counts and recompute log probabilities" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the posterior log probability of the samples X" - } - ], - "docstring": "Naive Bayes classifier for multinomial models\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes, )\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features\n given a class, ``P(x_i|y)``.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``intercept_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import MultinomialNB\n>>> clf = MultinomialNB()\n>>> clf.fit(X, y)\nMultinomialNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nNotes\n-----\nFor the rationale behind the names `coef_` and `intercept_`, i.e.\nnaive Bayes as a linear classifier, see J. Rennie et al. (2003),\nTackling the poor assumptions of naive Bayes text classifiers, ICML.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html" - }, - { - "name": "ComplementNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Only used in edge case with a single class in the training set." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. Not used." - }, - { - "name": "norm", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not a second normalization of the weights is performed. The default behavior mirrors the implementations found in Mahout and Weka, which do not follow the full algorithm described in Table 9 of the paper." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count feature occurrences." - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply smoothing to raw counts and compute the weights." - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the class scores for the samples in X." - } - ], - "docstring": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\nfit_prior : bool, default=True\n Only used in edge case with a single class in the training set.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. Not used.\n\nnorm : bool, default=False\n Whether or not a second normalization of the weights is performed. The\n default behavior mirrors the implementations found in Mahout and Weka,\n which do not follow the full algorithm described in Table 9 of the\n paper.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class. Only used in edge\n case with a single class in the training set.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_all_ : ndarray of shape (n_features,)\n Number of samples encountered for each feature during fitting. This\n value is weighted by the sample weight when provided.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature) during fitting.\n This value is weighted by the sample weight when provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical weights for class complements.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import ComplementNB\n>>> clf = ComplementNB()\n>>> clf.fit(X, y)\nComplementNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nRennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\nTackling the poor assumptions of naive bayes text classifiers. In ICML\n(Vol. 3, pp. 616-623).\nhttps://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf" - }, - { - "name": "BernoulliNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "binarize", - "type": "Optional[float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count and smooth feature occurrences." - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply smoothing to raw counts and recompute log probabilities" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the posterior log probability of the samples X" - } - ], - "docstring": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nbinarize : float or None, default=0.0\n Threshold for binarizing (mapping to booleans) of sample features.\n If None, input is presumed to already consist of binary vectors.\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes)\n Log probability of each class (smoothed).\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `BernoulliNB`\n as a linear model.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features given a class, P(x_i|y).\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `BernoulliNB`\n as a linear model.\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> Y = np.array([1, 2, 3, 4, 4, 5])\n>>> from sklearn.naive_bayes import BernoulliNB\n>>> clf = BernoulliNB()\n>>> clf.fit(X, Y)\nBernoulliNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\nA. McCallum and K. Nigam (1998). A comparison of event models for naive\nBayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\nText Categorization, pp. 41-48.\n\nV. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\nnaive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS)." - }, - { - "name": "CategoricalNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - }, - { - "name": "min_categories", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of categories per feature. - integer: Sets the minimum number of categories per feature to `n_categories` for each features. - array-like: shape (n_features,) where `n_categories[i]` holds the minimum number of categories for the ith column of the input. - None (default): Determines the number of categories automatically from the training data. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. Here, each feature of X is assumed to be from a different categorical distribution. It is further assumed that all categories of each feature are represented by the numbers 0, ..., n - 1, where n refers to the total number of categories for the given feature. This can, for instance, be achieved with the help of OrdinalEncoder." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. Here, each feature of X is assumed to be from a different categorical distribution. It is further assumed that all categories of each feature are represented by the numbers 0, ..., n - 1, where n refers to the total number of categories for the given feature. This can, for instance, be achieved with the help of OrdinalEncoder." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_init_counters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_n_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Naive Bayes classifier for categorical features\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nmin_categories : int or array-like of shape (n_features,), default=None\n Minimum number of categories per feature.\n\n - integer: Sets the minimum number of categories per feature to\n `n_categories` for each features.\n - array-like: shape (n_features,) where `n_categories[i]` holds the\n minimum number of categories for the ith column of the input.\n - None (default): Determines the number of categories automatically\n from the training data.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategory_count_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the number of samples\n encountered for each class and category of the specific feature.\n\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\nfeature_log_prob_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the empirical log probability\n of categories given the respective feature and class, ``P(x_i|y)``.\n\nn_features_ : int\n Number of features of each sample.\n\nn_categories_ : ndarray of shape (n_features,), dtype=np.int64\n Number of categories for each feature. This value is\n inferred from the data or set by the minimum number of categories.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import CategoricalNB\n>>> clf = CategoricalNB()\n>>> clf.fit(X, y)\nCategoricalNB()\n>>> print(clf.predict(X[2:3]))\n[3]" - } - ], - "functions": [] - }, - { - "name": "sklearn.pipeline", - "imports": [ - "from collections import defaultdict", - "from itertools import islice", - "import numpy as np", - "from scipy import sparse", - "from joblib import Parallel", - "from base import clone", - "from base import TransformerMixin", - "from utils._estimator_html_repr import _VisualBlock", - "from utils.metaestimators import if_delegate_has_method", - "from utils import Bunch", - "from utils import _print_elapsed_time", - "from utils.deprecation import deprecated", - "from utils._tags import _safe_tags", - "from utils.validation import check_memory", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils.metaestimators import _BaseComposition" - ], - "classes": [ - { - "name": "Pipeline", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "steps", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of (name, transform) tuples (implementing fit/transform) that are chained, in the order in which they are chained, with the last object an estimator." - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each step will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `steps` of the `Pipeline`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`steps`.\n\nReturns\n-------\nself" - }, - { - "name": "_validate_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (idx, (name, trans)) tuples from self.steps\n\nWhen filter_passthrough is True, 'passthrough' and None transformers\nare filtered out." - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the length of the Pipeline" - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a sub-pipeline or a single esimtator in the pipeline\n\nIndexing with an integer will return an estimator; using a slice\nreturns another Pipeline instance which copies a slice of this\nPipeline. This copy is shallow: modifying (or fitting) estimators in\nthe sub-pipeline will affect the larger pipeline and vice-versa.\nHowever, replacing a value in `step` will not affect a copy." - }, - { - "name": "_estimator_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "named_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model\n\nFit all the transforms one after the other and transform the\ndata, then fit the transformed data using the final estimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nself : Pipeline\n This estimator" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model and transform with the final estimator\n\nFits all the transforms one after the other and transforms the\ndata, then uses fit_transform on transformed data with the final\nestimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)\n Transformed samples" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "**predict_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to the ``predict`` called at the end of all transformations in the pipeline. Note that while this may be used to return uncertainties from some models with return_std or return_cov, uncertainties that are generated by the transformations in the pipeline are not propagated to the final estimator. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms to the data, and predict with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n**predict_params : dict of string -> object\n Parameters to the ``predict`` called at the end of all\n transformations in the pipeline. Note that while this may be\n used to return uncertainties from some models with return_std\n or return_cov, uncertainties that are generated by the\n transformations in the pipeline are not propagated to the\n final estimator.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny_pred : array-like" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Applies fit_predict of last step in pipeline after transforms.\n\nApplies fit_transforms of a pipeline to the data, followed by the\nfit_predict method of the final estimator in the pipeline. Valid\nonly if the final estimator implements fit_predict.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of\n the pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps\n of the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\ny_pred : array-like" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and predict_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_proba : array-like of shape (n_samples, n_classes)" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and decision_function of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)" - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and score_samples of the final estimator.\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)" - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and predict_log_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and transform with the final estimator\n\nThis also works where final estimator is ``None``: all prior\ntransformations are applied.\n\nParameters\n----------\nX : iterable\n Data to transform. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)" - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xt", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data samples, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. Must fulfill input requirements of last step of pipeline's ``inverse_transform`` method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply inverse transformations in reverse order\n\nAll estimators in the pipeline must support ``inverse_transform``.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_transformed_features)\n Data samples, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features. Must fulfill\n input requirements of last step of pipeline's\n ``inverse_transform`` method.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_features)" - }, - { - "name": "_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets used for scoring. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, this argument is passed as ``sample_weight`` keyword argument to the ``score`` method of the final estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and score with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\ny : iterable, default=None\n Targets used for scoring. Must fulfill label requirements for all\n steps of the pipeline.\n\nsample_weight : array-like, default=None\n If not None, this argument is passed as ``sample_weight`` keyword\n argument to the ``score`` method of the final estimator.\n\nReturns\n-------\nscore : float" - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement fit and transform methods.\nThe final estimator only needs to implement fit.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters.\nFor this, it enables setting parameters of the various steps using their\nnames and the parameter name separated by a '__', as in the example below.\nA step's estimator may be replaced entirely by setting the parameter\nwith its name to another estimator, or a transformer removed by setting\nit to 'passthrough' or ``None``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.5\n\nParameters\n----------\nsteps : list\n List of (name, transform) tuples (implementing fit/transform) that are\n chained, in the order in which they are chained, with the last object\n an estimator.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nAttributes\n----------\nnamed_steps : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n Read-only attribute to access any step parameter by user given name.\n Keys are step names and values are steps parameters.\n\nSee Also\n--------\nmake_pipeline : Convenience function for simplified pipeline construction.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.pipeline import Pipeline\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n>>> # The pipeline can be used as any other estimator\n>>> # and avoids leaking the test set into the train set\n>>> pipe.fit(X_train, y_train)\nPipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n>>> pipe.score(X_test, y_test)\n0.88" - }, - { - "name": "FeatureUnion", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "transformer_list", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of transformer objects to be applied to the data. The first half of each tuple is the name of the transformer. The tranformer can be 'drop' for it to be ignored. .. versionchanged:: 0.22 Deprecated `None` as a transformer in favor of 'drop'." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "transformer_weights", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multiplicative weights for features per transformer. Keys are transformer names, values the weights. Raises ValueError if key not present in ``transformer_list``." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformer_list` of the\n`FeatureUnion`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.\n\nReturns\n-------\nself" - }, - { - "name": "_validate_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_transformer_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, used to fit transformers." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data, used to fit transformers.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : FeatureUnion\n This estimator" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data to be transformed." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers." - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_parallel_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Runs func in parallel on X and y" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data to be transformed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers." - }, - { - "name": "_hstack", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_transformer_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer,\nor removed by setting to 'drop'.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ntransformer_list : list of (string, transformer) tuples\n List of transformer objects to be applied to the data. The first\n half of each tuple is the name of the transformer. The tranformer can\n be 'drop' for it to be ignored.\n\n .. versionchanged:: 0.22\n Deprecated `None` as a transformer in favor of 'drop'.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer.\n Keys are transformer names, values the weights.\n Raises ValueError if key not present in ``transformer_list``.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nSee Also\n--------\nmake_union : Convenience function for simplified feature union\n construction.\n\nExamples\n--------\n>>> from sklearn.pipeline import FeatureUnion\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n... (\"svd\", TruncatedSVD(n_components=2))])\n>>> X = [[0., 1., 3], [2., 2., 5]]\n>>> union.fit_transform(X)\narray([[ 1.5 , 3.0..., 0.8...],\n [-1.5 , 5.7..., -0.4...]])" - } - ], - "functions": [ - { - "name": "_name_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate names for estimators." - }, - { - "name": "make_pipeline", - "decorators": [], - "parameters": [ - { - "name": "*steps", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each step will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct a Pipeline from the given estimators.\n\nThis is a shorthand for the Pipeline constructor; it does not require, and\ndoes not permit, naming the estimators. Instead, their names will be set\nto the lowercase of their types automatically.\n\nParameters\n----------\n*steps : list of estimators.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nSee Also\n--------\nPipeline : Class for creating a pipeline of transforms with a final\n estimator.\n\nExamples\n--------\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.preprocessing import StandardScaler\n>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('gaussiannb', GaussianNB())])\n\nReturns\n-------\np : Pipeline" - }, - { - "name": "_transform_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\nwith the fitted transformer. If ``weight`` is not ``None``, the result will\nbe multiplied by ``weight``." - }, - { - "name": "_fit_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits ``transformer`` to ``X`` and ``y``." - }, - { - "name": "make_union", - "decorators": [], - "parameters": [ - { - "name": "*transformers", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct a FeatureUnion from the given transformers.\n\nThis is a shorthand for the FeatureUnion constructor; it does not require,\nand does not permit, naming the transformers. Instead, they will be given\nnames automatically based on their types. It also does not allow weighting.\n\nParameters\n----------\n*transformers : list of estimators\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nf : FeatureUnion\n\nSee Also\n--------\nFeatureUnion : Class for concatenating the results of multiple transformer\n objects.\n\nExamples\n--------\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> from sklearn.pipeline import make_union\n>>> make_union(PCA(), TruncatedSVD())\n FeatureUnion(transformer_list=[('pca', PCA()),\n ('truncatedsvd', TruncatedSVD())])" - } - ] - }, - { - "name": "sklearn.random_projection", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils.extmath import safe_sparse_dot", - "from utils.random import sample_without_replacement", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from exceptions import DataDimensionalityWarning" - ], - "classes": [ - { - "name": "BaseRandomProjection", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training set: only the shape is used to find optimal random matrix dimensions based on the theory referenced in the afore mentioned papers." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate a sparse random projection matrix.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training set: only the shape is used to find optimal random\n matrix dimensions based on the theory referenced in the\n afore mentioned papers.\n\ny\n Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to project into a smaller dimensional space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project the data by using matrix product with the random matrix\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input data to project into a smaller dimensional space.\n\nReturns\n-------\nX_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Projected array." - } - ], - "docstring": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead." - }, - { - "name": "GaussianRandomProjection", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when `n_components` is set to 'auto'. The value should be strictly positive. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the projection matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format." - } - ], - "docstring": "Reduce dimensionality through Gaussian random projection.\n\nThe components of the random matrix are drawn from N(0, 1 / n_components).\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when `n_components` is set to\n 'auto'. The value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : ndarray of shape (n_components, n_features)\n Random matrix used for the projection.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import GaussianRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = GaussianRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n\nSee Also\n--------\nSparseRandomProjection" - }, - { - "name": "SparseRandomProjection", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset." - }, - { - "name": "density", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Ratio in the range (0, 1] of non-zero component in the random projection matrix. If density = 'auto', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to 'auto'. This value should be strictly positive. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space." - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, ensure that the output of the random projection is a dense numpy array even if the input and random projection matrix are both sparse. In practice, if the number of components is small the number of zero components in the projected data will be very small and it will be more CPU and memory efficient to use a dense representation. If False, the projected data uses a sparse representation if the input is sparse." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the projection matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the random projection matrix\n\nParameters\n----------\nn_components : int\n Dimensionality of the target projection space.\n\nn_features : int\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format." - } - ], - "docstring": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\ndensity : float or 'auto', default='auto'\n Ratio in the range (0, 1] of non-zero component in the random\n projection matrix.\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when n_components is set to\n 'auto'. This value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\ndense_output : bool, default=False\n If True, ensure that the output of the random projection is a\n dense numpy array even if the input and random projection matrix\n are both sparse. In practice, if the number of components is\n small the number of zero components in the projected data will\n be very small and it will be more CPU and memory efficient to\n use a dense representation.\n\n If False, the projected data uses a sparse representation if\n the input is sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : sparse matrix of shape (n_components, n_features)\n Random matrix used for the projection. Sparse matrix will be of CSR\n format.\n\ndensity_ : float in range 0.0 - 1.0\n Concrete density computed from when density = \"auto\".\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import SparseRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = SparseRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n>>> # very few components are non-zero\n>>> np.mean(transformer.components_ != 0)\n0.0100...\n\nSee Also\n--------\nGaussianRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n https://users.soe.ucsc.edu/~optas/papers/jl.pdf" - } - ], - "functions": [ - { - "name": "johnson_lindenstrauss_min_dim", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[ArrayLike, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples that should be a integer greater than 0. If an array is given, it will compute a safe number of components array-wise." - }, - { - "name": "eps", - "type": "Union[NDArray, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum distortion rate in the range (0,1 ) as defined by the Johnson-Lindenstrauss lemma. If an array is given, it will compute a safe number of components array-wise." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like of int\n Number of samples that should be a integer greater than 0. If an array\n is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float, default=0.1\n Maximum distortion rate in the range (0,1 ) as defined by the\n Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n The minimal number of components to guarantee with good probability\n an eps-embedding with n_samples.\n\nExamples\n--------\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([ 663, 11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894, 9868, 11841])\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654" - }, - { - "name": "_check_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Factorize density check according to Li et al." - }, - { - "name": "_check_input_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Factorize argument checking for random matrix generation." - }, - { - "name": "_gaussian_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : ndarray of shape (n_components, n_features)\n The generated Gaussian random matrix.\n\nSee Also\n--------\nGaussianRandomProjection" - }, - { - "name": "_sparse_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - }, - { - "name": "density", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Ratio of non-zero component in the random projection matrix in the range `(0, 1]` If density = 'auto', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\ndensity : float or 'auto', default='auto'\n Ratio of non-zero component in the random projection matrix in the\n range `(0, 1]`\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated Gaussian random matrix. Sparse matrix will be of CSR\n format.\n\nSee Also\n--------\nSparseRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n http://www.cs.ucsc.edu/~optas/papers/jl.pdf" - } - ] - }, - { - "name": "sklearn.setup", - "imports": [ - "import sys", - "import os", - "from sklearn._build_utils import cythonize_extensions", - "from numpy.distutils.misc_util import Configuration", - "import numpy", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn._config", - "imports": [ - "import os", - "from contextlib import contextmanager as contextmanager" - ], - "classes": [], - "functions": [ - { - "name": "get_config", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Retrieve current values for configuration set by :func:`set_config`\n\nReturns\n-------\nconfig : dict\n Keys are parameter names that can be passed to :func:`set_config`.\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nset_config : Set global scikit-learn configuration." - }, - { - "name": "set_config", - "decorators": [], - "parameters": [ - { - "name": "assume_finite", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, validation for finiteness will be skipped, saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. Global default: False. .. versionadded:: 0.19" - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If set, scikit-learn will attempt to limit the size of temporary arrays to this number of MiB (per job when parallelised), often saving both computation time and memory on expensive operations that can be performed in chunks. Global default: 1024. .. versionadded:: 0.20" - }, - { - "name": "print_changed_only", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, only the parameters that were set to non-default values will be printed when printing an estimator. For example, ``print(SVC())`` while True will only print 'SVC()' while the default behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters. .. versionadded:: 0.21" - }, - { - "name": "display", - "type": "Literal['text', 'diagram']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. Default is 'text'. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19\n\nParameters\n----------\nassume_finite : bool, default=None\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\n .. versionadded:: 0.19\n\nworking_memory : int, default=None\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\n .. versionadded:: 0.20\n\nprint_changed_only : bool, default=None\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()' while the default\n behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n all the non-changed parameters.\n\n .. versionadded:: 0.21\n\ndisplay : {'text', 'diagram'}, default=None\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration." - }, - { - "name": "config_context", - "decorators": [], - "parameters": [ - { - "name": "assume_finite", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, validation for finiteness will be skipped, saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. Global default: False." - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "1024", - "limitation": null, - "ignored": false, - "docstring": "If set, scikit-learn will attempt to limit the size of temporary arrays to this number of MiB (per job when parallelised), often saving both computation time and memory on expensive operations that can be performed in chunks. Global default: 1024." - }, - { - "name": "print_changed_only", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, only the parameters that were set to non-default values will be printed when printing an estimator. For example, ``print(SVC())`` while True will only print 'SVC()', but would print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters when False. Default is True. .. versionchanged:: 0.23 Default changed from False to True." - }, - { - "name": "display", - "type": "Literal['text', 'diagram']", - "hasDefault": true, - "default": "'text'", - "limitation": null, - "ignored": false, - "docstring": "If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. Default is 'text'. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Context manager for global scikit-learn configuration\n\nParameters\n----------\nassume_finite : bool, default=False\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\nworking_memory : int, default=1024\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\nprint_changed_only : bool, default=True\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()', but would print\n 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n when False. Default is True.\n\n .. versionchanged:: 0.23\n Default changed from False to True.\n\ndisplay : {'text', 'diagram'}, default='text'\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nNotes\n-----\nAll settings, not just those presently modified, will be returned to\ntheir previous values when the context manager is exited. This is not\nthread-safe.\n\nExamples\n--------\n>>> import sklearn\n>>> from sklearn.utils.validation import assert_all_finite\n>>> with sklearn.config_context(assume_finite=True):\n... assert_all_finite([float('nan')])\n>>> with sklearn.config_context(assume_finite=True):\n... with sklearn.config_context(assume_finite=False):\n... assert_all_finite([float('nan')])\nTraceback (most recent call last):\n...\nValueError: Input contains NaN, ...\n\nSee Also\n--------\nset_config : Set global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration." - } - ] - }, - { - "name": "sklearn._distributor_init", - "imports": [ - "import os", - "import os.path as op", - "from ctypes import WinDLL" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn._min_dependencies", - "imports": [ - "import platform", - "import argparse" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn", - "imports": [ - "import sys", - "import logging", - "import os", - "import random", - "from _config import get_config", - "from _config import set_config", - "from _config import config_context", - "from None import _distributor_init", - "from None import __check_build", - "from base import clone", - "from utils._show_versions import show_versions", - "import numpy as np" - ], - "classes": [], - "functions": [ - { - "name": "setup_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fixture for the tests to assure globally controllable seeding of RNGs" - } - ] - }, - { - "name": "sklearn.cluster.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster._affinity_propagation", - "imports": [ - "import numpy as np", - "import warnings", - "from exceptions import ConvergenceWarning", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from utils import as_float_array", - "from utils import check_random_state", - "from utils.deprecation import deprecated", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from metrics import euclidean_distances", - "from metrics import pairwise_distances_argmin", - "from _config import config_context" - ], - "classes": [ - { - "name": "AffinityPropagation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "damping", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Damping factor (between 0.5 and 1) is the extent to which the current value is maintained relative to incoming values (weighted 1 - damping). This in order to avoid numerical oscillations when updating these values (messages)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "convergence_iter", - "type": "int", - "hasDefault": true, - "default": "15", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no change in the number of estimated clusters that stops the convergence." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Make a copy of input data." - }, - { - "name": "preference", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preferences for each point - points with larger values of preferences are more likely to be chosen as exemplars. The number of exemplars, ie of clusters, is influenced by the input preferences value. If the preferences are not passed as arguments, they will be set to the median of the input similarities." - }, - { - "name": "affinity", - "type": "Literal['euclidean', 'precomputed']", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Which affinity to use. At the moment 'precomputed' and ``euclidean`` are supported. 'euclidean' uses the negative squared euclidean distance between points." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to be verbose." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the starting state. Use an int for reproducible results across function calls. See the :term:`Glossary `. .. versionadded:: 0.23 this parameter was previously hardcoded as 0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the clustering from features or affinity matrix, and return\ncluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - } - ], - "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndamping : float, default=0.5\n Damping factor (between 0.5 and 1) is the extent to\n which the current value is maintained relative to\n incoming values (weighted 1 - damping). This in order\n to avoid numerical oscillations when updating these\n values (messages).\n\nmax_iter : int, default=200\n Maximum number of iterations.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\ncopy : bool, default=True\n Make a copy of input data.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number\n of exemplars, ie of clusters, is influenced by the input\n preferences value. If the preferences are not passed as arguments,\n they will be set to the median of the input similarities.\n\naffinity : {'euclidean', 'precomputed'}, default='euclidean'\n Which affinity to use. At the moment 'precomputed' and\n ``euclidean`` are supported. 'euclidean' uses the\n negative squared euclidean distance between points.\n\nverbose : bool, default=False\n Whether to be verbose.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nAttributes\n----------\ncluster_centers_indices_ : ndarray of shape (n_clusters,)\n Indices of cluster centers.\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Cluster centers (if affinity != ``precomputed``).\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Stores the affinity matrix used in ``fit``.\n\nn_iter_ : int\n Number of iterations taken to converge.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nThe algorithmic complexity of affinity propagation is quadratic\nin the number of points.\n\nWhen ``fit`` does not converge, ``cluster_centers_`` becomes an empty\narray and all training samples will be labelled as ``-1``. In addition,\n``predict`` will then label every sample as ``-1``.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, ``fit`` will result in\na single cluster center and label ``0`` for every sample. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\n\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007\n\nExamples\n--------\n>>> from sklearn.cluster import AffinityPropagation\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AffinityPropagation(random_state=5).fit(X)\n>>> clustering\nAffinityPropagation(random_state=5)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])\n>>> clustering.predict([[0, 0], [4, 4]])\narray([0, 1])\n>>> clustering.cluster_centers_\narray([[1, 2],\n [4, 2]])" - } - ], - "functions": [ - { - "name": "_equal_similarities_and_preferences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "affinity_propagation", - "decorators": [], - "parameters": [ - { - "name": "S", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix of similarities between points." - }, - { - "name": "preference", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preferences for each point - points with larger values of preferences are more likely to be chosen as exemplars. The number of exemplars, i.e. of clusters, is influenced by the input preferences value. If the preferences are not passed as arguments, they will be set to the median of the input similarities (resulting in a moderate number of clusters). For a smaller amount of clusters, this can be set to the minimum value of the similarities." - }, - { - "name": "convergence_iter", - "type": "int", - "hasDefault": true, - "default": "15", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no change in the number of estimated clusters that stops the convergence." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations" - }, - { - "name": "damping", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Damping factor between 0.5 and 1." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If copy is False, the affinity matrix is modified inplace by the algorithm, for memory efficiency." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the starting state. Use an int for reproducible results across function calls. See the :term:`Glossary `. .. versionadded:: 0.23 this parameter was previously hardcoded as 0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nS : array-like of shape (n_samples, n_samples)\n Matrix of similarities between points.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number of\n exemplars, i.e. of clusters, is influenced by the input preferences\n value. If the preferences are not passed as arguments, they will be\n set to the median of the input similarities (resulting in a moderate\n number of clusters). For a smaller amount of clusters, this can be set\n to the minimum value of the similarities.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\nmax_iter : int, default=200\n Maximum number of iterations\n\ndamping : float, default=0.5\n Damping factor between 0.5 and 1.\n\ncopy : bool, default=True\n If copy is False, the affinity matrix is modified inplace by the\n algorithm, for memory efficiency.\n\nverbose : bool, default=False\n The verbosity level.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nReturns\n-------\n\ncluster_centers_indices : ndarray of shape (n_clusters,)\n Index of clusters centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nWhen the algorithm does not converge, it returns an empty array as\n``cluster_center_indices`` and ``-1`` as label for each training sample.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, a single cluster center\nand label ``0`` for every sample will be returned. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007" - } - ] - }, - { - "name": "sklearn.cluster._agglomerative", - "imports": [ - "import warnings", - "from heapq import heapify", - "from heapq import heappop", - "from heapq import heappush", - "from heapq import heappushpop", - "import numpy as np", - "from scipy import sparse", - "from scipy.sparse.csgraph import connected_components", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from metrics.pairwise import paired_distances", - "from metrics.pairwise import pairwise_distances", - "from neighbors import DistanceMetric", - "from neighbors._dist_metrics import METRIC_MAPPING", - "from utils import check_array", - "from utils._fast_dict import IntFloatDict", - "from utils.fixes import _astype_copy_false", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import check_memory", - "from None import _hierarchical_fast as _hierarchical", - "from _feature_agglomeration import AgglomerationTransform", - "from scipy.sparse.csgraph import minimum_spanning_tree", - "from scipy.cluster import hierarchy" - ], - "classes": [ - { - "name": "AgglomerativeClustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "Optional[int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to find. It must be ``None`` if ``distance_threshold`` is not ``None``." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\", \"manhattan\", \"cosine\", or \"precomputed\". If linkage is \"ward\", only \"euclidean\" is accepted. If \"precomputed\", a distance matrix (instead of a similarity matrix) is needed as input for the fit method." - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory." - }, - { - "name": "connectivity", - "type": "Union[Callable, ArrayLike]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms the data into a connectivity matrix, such as derived from kneighbors_graph. Default is ``None``, i.e, the hierarchical clustering algorithm is unstructured." - }, - { - "name": "compute_full_tree", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at ``n_clusters``. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree. It must be ``True`` if ``distance_threshold`` is not ``None``. By default `compute_full_tree` is \"auto\", which is equivalent to `True` when `distance_threshold` is not `None` or that `n_clusters` is inferior to the maximum between 100 or `0.02 * n_samples`. Otherwise, \"auto\" is equivalent to `False`." - }, - { - "name": "linkage", - "type": "Literal['ward', 'complete', 'average', 'single']", - "hasDefault": true, - "default": "'ward'", - "limitation": null, - "ignored": false, - "docstring": "Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion. - 'ward' minimizes the variance of the clusters being merged. - 'average' uses the average of the distances of each observation of the two sets. - 'complete' or 'maximum' linkage uses the maximum distances between all observations of the two sets. - 'single' uses the minimum of the distances between all observations of the two sets. .. versionadded:: 0.20 Added the 'single' option" - }, - { - "name": "distance_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The linkage distance threshold above which, clusters will not be merged. If not ``None``, ``n_clusters`` must be ``None`` and ``compute_full_tree`` must be ``True``. .. versionadded:: 0.21" - }, - { - "name": "compute_distances", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Computes distances between clusters even if `distance_threshold` is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``affinity='precomputed'``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the hierarchical clustering from features, or distance matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``affinity='precomputed'``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the hierarchical clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - } - ], - "docstring": "Agglomerative Clustering\n\nRecursively merges the pair of clusters that minimally increases\na given linkage distance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or None, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or \"precomputed\".\n If linkage is \"ward\", only \"euclidean\" is accepted.\n If \"precomputed\", a distance matrix (instead of a similarity matrix)\n is needed as input for the fit method.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each sample the neighboring\n samples following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is ``None``, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at ``n_clusters``. This is\n useful to decrease computation time if the number of clusters is not\n small compared to the number of samples. This option is useful only\n when specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of observation. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - 'ward' minimizes the variance of the clusters being merged.\n - 'average' uses the average of the distances of each observation of\n the two sets.\n - 'complete' or 'maximum' linkage uses the maximum distances between\n all observations of the two sets.\n - 'single' uses the minimum of the distances between all observations\n of the two sets.\n\n .. versionadded:: 0.20\n Added the 'single' option\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : ndarray of shape (n_samples)\n cluster labels for each point\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_samples-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> from sklearn.cluster import AgglomerativeClustering\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AgglomerativeClustering().fit(X)\n>>> clustering\nAgglomerativeClustering()\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])" - }, - { - "name": "FeatureAgglomeration", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to find. It must be ``None`` if ``distance_threshold`` is not ``None``." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\", \"manhattan\", \"cosine\", or 'precomputed'. If linkage is \"ward\", only \"euclidean\" is accepted." - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory." - }, - { - "name": "connectivity", - "type": "Union[Callable, ArrayLike]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Connectivity matrix. Defines for each feature the neighboring features following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms the data into a connectivity matrix, such as derived from kneighbors_graph. Default is None, i.e, the hierarchical clustering algorithm is unstructured." - }, - { - "name": "compute_full_tree", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of features. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree. It must be ``True`` if ``distance_threshold`` is not ``None``. By default `compute_full_tree` is \"auto\", which is equivalent to `True` when `distance_threshold` is not `None` or that `n_clusters` is inferior to the maximum between 100 or `0.02 * n_samples`. Otherwise, \"auto\" is equivalent to `False`." - }, - { - "name": "linkage", - "type": "Literal['ward', 'complete', 'average', 'single']", - "hasDefault": true, - "default": "'ward'", - "limitation": null, - "ignored": false, - "docstring": "Which linkage criterion to use. The linkage criterion determines which distance to use between sets of features. The algorithm will merge the pairs of cluster that minimize this criterion. - ward minimizes the variance of the clusters being merged. - average uses the average of the distances of each feature of the two sets. - complete or maximum linkage uses the maximum distances between all features of the two sets. - single uses the minimum of the distances between all observations of the two sets." - }, - { - "name": "pooling_func", - "type": "Callable", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "This combines the values of agglomerated features into a single value, and should accept an array of shape [M, N] and the keyword argument `axis=1`, and reduce it to an array of size [M]." - }, - { - "name": "distance_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The linkage distance threshold above which, clusters will not be merged. If not ``None``, ``n_clusters`` must be ``None`` and ``compute_full_tree`` must be ``True``. .. versionadded:: 0.21" - }, - { - "name": "compute_distances", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Computes distances between clusters even if `distance_threshold` is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the hierarchical clustering on the data\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Agglomerate features.\n\nSimilar to AgglomerativeClustering, but recursively merges features\ninstead of samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or 'precomputed'.\n If linkage is \"ward\", only \"euclidean\" is accepted.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each feature the neighboring\n features following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is None, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at n_clusters. This is useful\n to decrease computation time if the number of clusters is not small\n compared to the number of features. This option is useful only when\n specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of features. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - ward minimizes the variance of the clusters being merged.\n - average uses the average of the distances of each feature of\n the two sets.\n - complete or maximum linkage uses the maximum distances between\n all features of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\npooling_func : callable, default=np.mean\n This combines the values of agglomerated features into a single\n value, and should accept an array of shape [M, N] and the keyword\n argument `axis=1`, and reduce it to an array of size [M].\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : array-like of (n_features,)\n cluster labels for each feature.\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_features`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_features` is a non-leaf\n node and has children `children_[i - n_features]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_features + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets, cluster\n>>> digits = datasets.load_digits()\n>>> images = digits.images\n>>> X = np.reshape(images, (len(images), -1))\n>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n>>> agglo.fit(X)\nFeatureAgglomeration(n_clusters=32)\n>>> X_reduced = agglo.transform(X)\n>>> X_reduced.shape\n(1797, 32)" - } - ], - "functions": [ - { - "name": "_fix_connectivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fixes the connectivity matrix\n\n - copies it\n - makes it symmetric\n - converts it to LIL if necessary\n - completes it if necessary" - }, - { - "name": "_single_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform single linkage clustering on sparse data via the minimum\nspanning tree from scipy.sparse.csgraph, then using union-find to label.\nThe parent array is then generated by walking through the tree." - }, - { - "name": "ward_tree", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "feature matrix representing n_samples samples to be clustered" - }, - { - "name": "connectivity", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, return the distance between the clusters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ward clustering based on a Feature matrix.\n\nRecursively merges the pair of clusters that minimally increases\nwithin-cluster variance.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nreturn_distance : bool, default=None\n If True, return the distance between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree\n\nparents : ndarray of shape (n_nodes,) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Only returned if return_distance is set to True (for compatibility).\n The distances between the centers of the nodes. `distances[i]`\n corresponds to a weighted euclidean distance between\n the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to\n leaves of the tree, then `distances[i]` is their unweighted euclidean\n distance. Distances are updated in the following way\n (from scipy.hierarchy.linkage):\n\n The new entry :math:`d(u,v)` is computed as follows,\n\n .. math::\n\n d(u,v) = \\sqrt{\\frac{|v|+|s|}\n {T}d(v,s)^2\n + \\frac{|v|+|t|}\n {T}d(v,t)^2\n - \\frac{|v|}\n {T}d(s,t)^2}\n\n where :math:`u` is the newly joined cluster consisting of\n clusters :math:`s` and :math:`t`, :math:`v` is an unused\n cluster in the forest, :math:`T=|v|+|s|+|t|`, and\n :math:`|*|` is the cardinality of its argument. This is also\n known as the incremental algorithm." - }, - { - "name": "linkage_tree", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "feature matrix representing n_samples samples to be clustered" - }, - { - "name": "connectivity", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix." - }, - { - "name": "linkage", - "type": "Literal[\"average\", \"complete\", \"single\"]", - "hasDefault": true, - "default": "\"complete\"", - "limitation": null, - "ignored": false, - "docstring": "Which linkage criteria to use. The linkage criterion determines which distance to use between sets of observation. - average uses the average of the distances of each observation of the two sets - complete or maximum linkage uses the maximum distances between all observations of the two sets. - single uses the minimum of the distances between all observations of the two sets." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"euclidean\"", - "limitation": null, - "ignored": false, - "docstring": "which metric to use. Can be \"euclidean\", \"manhattan\", or any distance know to paired distance (see metric.pairwise)" - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether or not to return the distances between the clusters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Linkage agglomerative clustering based on a Feature matrix.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nlinkage : {\"average\", \"complete\", \"single\"}, default=\"complete\"\n Which linkage criteria to use. The linkage criterion determines which\n distance to use between sets of observation.\n - average uses the average of the distances of each observation of\n the two sets\n - complete or maximum linkage uses the maximum distances between\n all observations of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\naffinity : str or callable, default=\"euclidean\".\n which metric to use. Can be \"euclidean\", \"manhattan\", or any\n distance know to paired distance (see metric.pairwise)\n\nreturn_distance : bool, default=False\n whether or not to return the distances between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree.\n\nparents : ndarray of shape (n_nodes, ) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Returned when return_distance is set to True.\n\n distances[i] refers to the distance between children[i][0] and\n children[i][1] when they are merged.\n\nSee Also\n--------\nward_tree : Hierarchical clustering with ward linkage." - }, - { - "name": "_complete_linkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_average_linkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_single_linkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_hc_cut", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "Union[NDArray, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form." - }, - { - "name": "children", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The children of each non-leaf node. Values less than `n_samples` correspond to leaves of the tree which are the original samples. A node `i` greater than or equal to `n_samples` is a non-leaf node and has children `children_[i - n_samples]`. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i`" - }, - { - "name": "n_leaves", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of leaves of the tree." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Function cutting the ward tree for a given number of clusters.\n\nParameters\n----------\nn_clusters : int or ndarray\n The number of clusters to form.\n\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_leaves : int\n Number of leaves of the tree.\n\nReturns\n-------\nlabels : array [n_samples]\n cluster labels for each point" - } - ] - }, - { - "name": "sklearn.cluster._bicluster", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import numpy as np", - "from scipy.linalg import norm", - "from scipy.sparse import dia_matrix", - "from scipy.sparse import issparse", - "from scipy.sparse.linalg import eigsh", - "from scipy.sparse.linalg import svds", - "from None import KMeans", - "from None import MiniBatchKMeans", - "from base import BaseEstimator", - "from base import BiclusterMixin", - "from utils import check_random_state", - "from utils.extmath import make_nonnegative", - "from utils.extmath import randomized_svd", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import assert_all_finite", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BaseSpectral", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Creates a biclustering for X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\ny : Ignored" - }, - { - "name": "_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns first `n_components` left and right singular\nvectors u and v, discarding the first `n_discard`." - }, - { - "name": "_k_means", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for spectral biclustering." - }, - { - "name": "SpectralCoclustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of biclusters to find." - }, - { - "name": "svd_method", - "type": "Literal['randomized', 'arpack']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "Selects the algorithm for finding singular vectors. May be 'randomized' or 'arpack'. If 'randomized', use :func:`sklearn.utils.extmath.randomized_svd`, which may be faster for large matrices. If 'arpack', use :func:`scipy.sparse.linalg.svds`, which is more accurate, but possibly slower in some cases." - }, - { - "name": "n_svd_vecs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of vectors to use in calculating the SVD. Corresponds to `ncv` when `svd_method=arpack` and `n_oversamples` when `svd_method` is 'randomized`." - }, - { - "name": "mini_batch", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use mini-batch k-means, which is faster but may get different results." - }, - { - "name": "init", - "type": null, - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization of k-means algorithm; defaults to 'k-means++'." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of random initializations that are tried with the k-means algorithm. If mini-batch k-means is used, the best initialization is chosen and the algorithm runs once. Otherwise, the algorithm is run for each initialization and the best solution chosen." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomizing the singular value decomposition and the k-means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=3\n The number of biclusters to find.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', use\n :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', use\n :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random', or ndarray of shape (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n The bicluster label of each row.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n The bicluster label of each column.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralCoclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_ #doctest: +SKIP\narray([0, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_ #doctest: +SKIP\narray([0, 0], dtype=int32)\n>>> clustering\nSpectralCoclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Dhillon, Inderjit S, 2001. `Co-clustering documents and words using\n bipartite spectral graph partitioning\n `__." - }, - { - "name": "SpectralBiclustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of row and column clusters in the checkerboard structure." - }, - { - "name": "method", - "type": "Literal['bistochastic', 'scale', 'log']", - "hasDefault": true, - "default": "'bistochastic'", - "limitation": null, - "ignored": false, - "docstring": "Method of normalizing and converting singular vectors into biclusters. May be one of 'scale', 'bistochastic', or 'log'. The authors recommend using 'log'. If the data is sparse, however, log normalization will not work, which is why the default is 'bistochastic'. .. warning:: if `method='log'`, the data must be sparse." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "6", - "limitation": null, - "ignored": false, - "docstring": "Number of singular vectors to check." - }, - { - "name": "n_best", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of best singular vectors to which to project the data for clustering." - }, - { - "name": "svd_method", - "type": "Literal['randomized', 'arpack']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "Selects the algorithm for finding singular vectors. May be 'randomized' or 'arpack'. If 'randomized', uses :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster for large matrices. If 'arpack', uses `scipy.sparse.linalg.svds`, which is more accurate, but possibly slower in some cases." - }, - { - "name": "n_svd_vecs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of vectors to use in calculating the SVD. Corresponds to `ncv` when `svd_method=arpack` and `n_oversamples` when `svd_method` is 'randomized`." - }, - { - "name": "mini_batch", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use mini-batch k-means, which is faster but may get different results." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization of k-means algorithm; defaults to 'k-means++'." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of random initializations that are tried with the k-means algorithm. If mini-batch k-means is used, the best initialization is chosen and the algorithm runs once. Otherwise, the algorithm is run for each initialization and the best solution chosen." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomizing the singular value decomposition and the k-means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_best_piecewise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the ``n_best`` vectors that are best approximated by piecewise\nconstant vectors.\n\nThe piecewise vectors are found by k-means; the best is chosen\naccording to Euclidean distance." - }, - { - "name": "_project_and_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project ``data`` to ``vectors`` and cluster the result." - } - ], - "docstring": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n The number of row and column clusters in the checkerboard\n structure.\n\nmethod : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n Method of normalizing and converting singular vectors into\n biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n The authors recommend using 'log'. If the data is sparse,\n however, log normalization will not work, which is why the\n default is 'bistochastic'.\n\n .. warning::\n if `method='log'`, the data must be sparse.\n\nn_components : int, default=6\n Number of singular vectors to check.\n\nn_best : int, default=3\n Number of best singular vectors to which to project the data\n for clustering.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', uses\n :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', uses\n `scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n Row partition labels.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n Column partition labels.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralBiclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_\narray([0, 1], dtype=int32)\n>>> clustering\nSpectralBiclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray\n data: coclustering genes and conditions\n `__." - } - ], - "functions": [ - { - "name": "_scale_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalize ``X`` by scaling rows and columns independently.\n\nReturns the normalized matrix and the row and column scaling\nfactors." - }, - { - "name": "_bistochastic_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalize rows and columns of ``X`` simultaneously so that all\nrows sum to one constant and all columns sum to a different\nconstant." - }, - { - "name": "_log_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalize ``X`` according to Kluger's log-interactions scheme." - } - ] - }, - { - "name": "sklearn.cluster._birch", - "imports": [ - "import warnings", - "import numbers", - "import numpy as np", - "from scipy import sparse", - "from math import sqrt", - "from metrics import pairwise_distances_argmin", - "from metrics.pairwise import euclidean_distances", - "from base import TransformerMixin", - "from base import ClusterMixin", - "from base import BaseEstimator", - "from utils.extmath import row_norms", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning", - "from None import AgglomerativeClustering", - "from _config import config_context" - ], - "classes": [ - { - "name": "_CFNode", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold needed for a new subcluster to enter a CFSubcluster." - }, - { - "name": "branching_factor", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of CF subclusters in each node." - }, - { - "name": "is_leaf", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "We need to know if the CFNode is a leaf or not, in order to retrieve the final subclusters." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "append_subcluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_split_subclusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Remove a subcluster from a node and update it with the\nsplit subclusters." - }, - { - "name": "insert_cf_subcluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Insert a new subcluster into the node." - } - ], - "docstring": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.\n\nParameters\n----------\nthreshold : float\n Threshold needed for a new subcluster to enter a CFSubcluster.\n\nbranching_factor : int\n Maximum number of CF subclusters in each node.\n\nis_leaf : bool\n We need to know if the CFNode is a leaf or not, in order to\n retrieve the final subclusters.\n\nn_features : int\n The number of features.\n\nAttributes\n----------\nsubclusters_ : list\n List of subclusters for a particular CFNode.\n\nprev_leaf_ : _CFNode\n Useful only if is_leaf is True.\n\nnext_leaf_ : _CFNode\n next_leaf. Useful only if is_leaf is True.\n the final subclusters.\n\ninit_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n Manipulate ``init_centroids_`` throughout rather than centroids_ since\n the centroids are just a view of the ``init_centroids_`` .\n\ninit_sq_norm_ : ndarray of shape (branching_factor + 1,)\n manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\ncentroids_ : ndarray of shape (branching_factor + 1, n_features)\n View of ``init_centroids_``.\n\nsquared_norm_ : ndarray of shape (branching_factor + 1,)\n View of ``init_sq_norm_``." - }, - { - "name": "_CFSubcluster", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "linear_sum", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample. This is kept optional to allow initialization of empty subclusters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "merge_subcluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if a cluster is worthy enough to be merged. If\nyes then merge." - }, - { - "name": "radius", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return radius of the subcluster" - } - ], - "docstring": "Each subcluster in a CFNode is called a CFSubcluster.\n\nA CFSubcluster can have a CFNode has its child.\n\nParameters\n----------\nlinear_sum : ndarray of shape (n_features,), default=None\n Sample. This is kept optional to allow initialization of empty\n subclusters.\n\nAttributes\n----------\nn_samples_ : int\n Number of samples that belong to each subcluster.\n\nlinear_sum_ : ndarray\n Linear sum of all the samples in a subcluster. Prevents holding\n all sample data in memory.\n\nsquared_sum_ : float\n Sum of the squared l2 norms of all samples belonging to a subcluster.\n\ncentroid_ : ndarray of shape (branching_factor + 1, n_features)\n Centroid of the subcluster. Prevent recomputing of centroids when\n ``CFNode.centroids_`` is called.\n\nchild_ : _CFNode\n Child Node of the subcluster. Once a given _CFNode is set as the child\n of the _CFNode, it is set to ``self.child_``.\n\nsq_norm_ : ndarray of shape (branching_factor + 1,)\n Squared norm of the subcluster. Used to prevent recomputing when\n pairwise minimum distances are computed." - }, - { - "name": "Birch", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The radius of the subcluster obtained by merging a new sample and the closest subcluster should be lesser than the threshold. Otherwise a new subcluster is started. Setting this value to be very low promotes splitting and vice-versa." - }, - { - "name": "branching_factor", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of CF subclusters in each node. If a new samples enters such that the number of subclusters exceed the branching_factor then that node is split into two nodes with the subclusters redistributed in each. The parent subcluster of that node is removed and two new subclusters are added as parents of the 2 split nodes." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of clusters after the final clustering step, which treats the subclusters from the leaves as new samples. - `None` : the final clustering step is not performed and the subclusters are returned as they are. - :mod:`sklearn.cluster` Estimator : If a model is provided, the model is fit treating the subclusters as new samples and the initial data is mapped to the label of the closest subcluster. - `int` : the model fit is :class:`AgglomerativeClustering` with `n_clusters` set to be equal to the int." - }, - { - "name": "compute_labels", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to compute labels for each fit." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to make a copy of the given data. If set to False, the initial data will be overwritten." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a CF Tree for the input data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_leaves", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Retrieve the leaves of the CF Node.\n\nReturns\n-------\nleaves : list of shape (n_leaves,)\n List of the leaf nodes." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If X is not provided, only the global clustering step is done." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online learning. Prevents rebuilding of CFTree from scratch.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n Input data. If X is not provided, only the global clustering\n step is done.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "_check_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict data using the ``centroids_`` of subclusters.\n\nAvoid computation of the row norms of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nlabels : ndarray of shape(n_samples,)\n Labelled data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n Transformed data." - }, - { - "name": "_global_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Global clustering for the subclusters obtained after fitting" - } - ], - "docstring": "Implements the Birch clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nthreshold : float, default=0.5\n The radius of the subcluster obtained by merging a new sample and the\n closest subcluster should be lesser than the threshold. Otherwise a new\n subcluster is started. Setting this value to be very low promotes\n splitting and vice-versa.\n\nbranching_factor : int, default=50\n Maximum number of CF subclusters in each node. If a new samples enters\n such that the number of subclusters exceed the branching_factor then\n that node is split into two nodes with the subclusters redistributed\n in each. The parent subcluster of that node is removed and two new\n subclusters are added as parents of the 2 split nodes.\n\nn_clusters : int, instance of sklearn.cluster model, default=3\n Number of clusters after the final clustering step, which treats the\n subclusters from the leaves as new samples.\n\n - `None` : the final clustering step is not performed and the\n subclusters are returned as they are.\n\n - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n is fit treating the subclusters as new samples and the initial data\n is mapped to the label of the closest subcluster.\n\n - `int` : the model fit is :class:`AgglomerativeClustering` with\n `n_clusters` set to be equal to the int.\n\ncompute_labels : bool, default=True\n Whether or not to compute labels for each fit.\n\ncopy : bool, default=True\n Whether or not to make a copy of the given data. If set to False,\n the initial data will be overwritten.\n\nAttributes\n----------\nroot_ : _CFNode\n Root of the CFTree.\n\ndummy_leaf_ : _CFNode\n Start pointer to all the leaves.\n\nsubcluster_centers_ : ndarray\n Centroids of all subclusters read directly from the leaves.\n\nsubcluster_labels_ : ndarray\n Labels assigned to the centroids of the subclusters after\n they are clustered globally.\n\nlabels_ : ndarray of shape (n_samples,)\n Array of labels assigned to the input data.\n if partial_fit is used instead of fit, they are assigned to the\n last batch of data.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative implementation that does incremental updates\n of the centers' positions using mini-batches.\n\nNotes\n-----\nThe tree data structure consists of nodes with each node consisting of\na number of subclusters. The maximum number of subclusters in a node\nis determined by the branching factor. Each subcluster maintains a\nlinear sum, squared sum and the number of samples in that subcluster.\nIn addition, each subcluster can also have a node as its child, if the\nsubcluster is not a member of a leaf node.\n\nFor a new point entering the root, it is merged with the subcluster closest\nto it and the linear sum, squared sum and the number of samples of that\nsubcluster are updated. This is done recursively till the properties of\nthe leaf node are updated.\n\nReferences\n----------\n* Tian Zhang, Raghu Ramakrishnan, Maron Livny\n BIRCH: An efficient data clustering method for large databases.\n https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n* Roberto Perdisci\n JBirch - Java implementation of BIRCH clustering algorithm\n https://code.google.com/archive/p/jbirch\n\nExamples\n--------\n>>> from sklearn.cluster import Birch\n>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n>>> brc = Birch(n_clusters=None)\n>>> brc.fit(X)\nBirch(n_clusters=None)\n>>> brc.predict(X)\narray([0, 0, 0, 1, 1, 1])" - } - ], - "functions": [ - { - "name": "_iterate_sparse_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This little hack returns a densified row when iterating over a sparse\nmatrix, instead of constructing a sparse matrix for every row that is\nexpensive." - }, - { - "name": "_split_node", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n according to the nearest distance between the subclusters to the\n pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters." - } - ] - }, - { - "name": "sklearn.cluster._dbscan", - "imports": [ - "import numpy as np", - "import warnings", - "from scipy import sparse", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from neighbors import NearestNeighbors", - "from _dbscan_inner import dbscan_inner" - ], - "classes": [ - { - "name": "DBSCAN", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a :term:`Glossary `, in which case only \"nonzero\" elements may be considered neighbors for DBSCAN. .. versionadded:: 0.17 metric *precomputed* to accept precomputed sparse matrix." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.19" - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The power of the Minkowski metric to be used to calculate distance between points. If None, then ``p=2`` (equivalent to the Euclidean distance)." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform DBSCAN clustering from features, or distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform DBSCAN clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels. Noisy samples are given the label -1." - } - ], - "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : string, or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `, in which\n case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n .. versionadded:: 0.17\n metric *precomputed* to accept precomputed sparse matrix.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=None\n The power of the Minkowski metric to be used to calculate distance\n between points. If None, then ``p=2`` (equivalent to the Euclidean\n distance).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\ncore_sample_indices_ : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\ncomponents_ : ndarray of shape (n_core_samples, n_features)\n Copy of each core sample found by training.\n\nlabels_ : ndarray of shape (n_samples)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples are given the label -1.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 2], [2, 3],\n... [8, 7], [8, 8], [25, 80]])\n>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n>>> clustering.labels_\narray([ 0, 0, 0, 1, 1, -1])\n>>> clustering\nDBSCAN(eps=3, min_samples=2)\n\nSee Also\n--------\nOPTICS : A similar clustering at multiple values of eps. Our implementation\n is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:class:`cluster.OPTICS` provides a similar clustering with lower memory\nusage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19." - } - ], - "functions": [ - { - "name": "dbscan", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A feature array, or array of distances between samples if ``metric='precomputed'``." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph `, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.19" - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "float", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The power of the Minkowski metric to be used to calculate distance between points." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. If precomputed distance are used, parallel execution is not available and thus n_jobs will have no effect." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n A feature array, or array of distances between samples if\n ``metric='precomputed'``.\n\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : str or callable, default='minkowski'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit.\n X may be a :term:`sparse graph `,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=2\n The power of the Minkowski metric to be used to calculate distance\n between points.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with negative\n weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. ``None`` means\n 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n using all processors. See :term:`Glossary ` for more details.\n If precomputed distance are used, parallel execution is not available\n and thus n_jobs will have no effect.\n\nReturns\n-------\ncore_samples : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point. Noisy samples are given the label -1.\n\nSee Also\n--------\nDBSCAN : An estimator interface for this clustering algorithm.\nOPTICS : A similar estimator interface clustering at multiple values of\n eps. Our implementation is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:func:`cluster.optics ` provides a similar\nclustering with lower memory usage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19." - } - ] - }, - { - "name": "sklearn.cluster._feature_agglomeration", - "imports": [ - "import numpy as np", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from scipy.sparse import issparse" - ], - "classes": [ - { - "name": "AgglomerationTransform", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A M by N array of M observations in N dimensions or a length M array of M one-dimensional observations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform a new matrix using the built clustering\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples,)\n A M by N array of M observations in N dimensions or a length\n M array of M one-dimensional observations.\n\nReturns\n-------\nY : ndarray of shape (n_samples, n_clusters) or (n_clusters,)\n The pooled values for each feature cluster." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The values to be assigned to each cluster of samples" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse the transformation.\nReturn a vector of size nb_features with the values of Xred assigned\nto each group of features\n\nParameters\n----------\nXred : array-like of shape (n_samples, n_clusters) or (n_clusters,)\n The values to be assigned to each cluster of samples\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features) or (n_features,)\n A vector of size n_samples with the values of Xred assigned to\n each of the cluster of samples." - } - ], - "docstring": "A class for feature agglomeration via the transform interface" - } - ], - "functions": [] - }, - { - "name": "sklearn.cluster._kmeans", - "imports": [ - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from threadpoolctl import threadpool_limits", - "from threadpoolctl import threadpool_info", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from base import TransformerMixin", - "from metrics.pairwise import euclidean_distances", - "from utils.extmath import row_norms", - "from utils.extmath import stable_cumsum", - "from utils.sparsefuncs_fast import assign_rows_csr", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.validation import _deprecate_positional_args", - "from utils import check_array", - "from utils import gen_batches", - "from utils import check_random_state", - "from utils import deprecated", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils._openmp_helpers import _openmp_effective_n_threads", - "from exceptions import ConvergenceWarning", - "from _k_means_fast import CHUNK_SIZE", - "from _k_means_fast import _inertia_dense", - "from _k_means_fast import _inertia_sparse", - "from _k_means_fast import _mini_batch_update_csr", - "from _k_means_lloyd import lloyd_iter_chunked_dense", - "from _k_means_lloyd import lloyd_iter_chunked_sparse", - "from _k_means_elkan import init_bounds_dense", - "from _k_means_elkan import init_bounds_sparse", - "from _k_means_elkan import elkan_iter_chunked_dense", - "from _k_means_elkan import elkan_iter_chunked_sparse" - ], - "classes": [ - { - "name": "KMeans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form as well as the number of centroids to generate." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm for a single run." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence." - }, - { - "name": "precompute_distances", - "type": "Literal['auto', True, False]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 million. This corresponds to about 100MB overhead per job using double precision. True : always precompute distances. False : never precompute distances. .. deprecated:: 0.23 'precompute_distances' was deprecated in version 0.22 and will be removed in 1.0 (renaming of 0.25). It has no effect." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "copy_x", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified. If False, the original data is modified, and put back before the function returns, but small numerical differences may be introduced by subtracting and then adding the data mean. Note that if the original data is not C-contiguous, a copy will be made even if copy_x is False. If the original data is sparse, but not in CSR format, a copy will be made even if copy_x is False." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center. ``None`` or ``-1`` means using all processors. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "algorithm", - "type": "Literal[\"auto\", \"full\", \"elkan\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "K-means algorithm to use. The classical EM-style algorithm is \"full\". The \"elkan\" variation is more efficient on data with well-defined clusters, by using the triangle inequality. However it's more memory intensive due to the allocation of an extra array of shape (n_samples, n_clusters). For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it might change in the future for a better heuristic. .. versionchanged:: 0.18 Added Elkan algorithm" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_center_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if centers is compatible with X and n_clusters." - }, - { - "name": "_check_test_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_mkl_vcomp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Warns when vcomp and mkl are both present" - }, - { - "name": "_init_centroids", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared euclidean norm of each data point. Pass it if you have it at hands already to avoid it being recomputed here." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. See :term:`Glossary `." - }, - { - "name": "init_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to randomly sample for speeding up the initialization (sometimes at the expense of accuracy)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the initial centroids.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point. Pass it if you have it\n at hands already to avoid it being recomputed here.\n\ninit : {'k-means++', 'random'}, callable or ndarray of shape (n_clusters, n_features)\n Method for initialization.\n\nrandom_state : RandomState instance\n Determines random number generation for centroid initialization.\n See :term:`Glossary `.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous. If a sparse matrix is passed, a copy will be made if it's not in CSR format." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute k-means clustering.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory\n copy if the given data is not C-contiguous.\n If a sparse matrix is passed, a copy will be made if it's not in\n CSR format.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to transform." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute cluster centers and predict cluster index for each sample.\n\nConvenience method; equivalent to calling fit(X) followed by\npredict(X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to transform." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute clustering and transform X to cluster-distance space.\n\nEquivalent to fit(X).transform(X), but more efficiently implemented.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X to a cluster-distance space.\n\nIn the new space, each dimension is the distance to the cluster\ncenters. Note that even if X is sparse, the array returned by\n`transform` will typically be dense.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Guts of transform method; no input validation." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the value of X on the K-means objective.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nscore : float\n Opposite of the value of X on the K-means objective." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm for a\n single run.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nprecompute_distances : {'auto', True, False}, default='auto'\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances.\n\n False : never precompute distances.\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.22 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nverbose : int, default=0\n Verbosity mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\n .. versionchanged:: 0.18\n Added Elkan algorithm\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers. If the algorithm stops before fully\n converging (see ``tol`` and ``max_iter``), these will not be\n consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\ninertia_ : float\n Sum of squared distances of samples to their closest cluster center.\n\nn_iter_ : int\n Number of iterations run.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative online implementation that does incremental\n updates of the centers positions using mini-batches.\n For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n probably much faster than the default batch implementation.\n\nNotes\n-----\nThe k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\nThe average complexity is given by O(k n T), were n is the number of\nsamples and T is the number of iteration.\n\nThe worst case complexity is given by O(n^(k+2/p)) with\nn = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n'How slow is the k-means method?' SoCG2006)\n\nIn practice, the k-means algorithm is very fast (one of the fastest\nclustering algorithms available), but it falls in local minima. That's why\nit can be useful to restart it several times.\n\nIf the algorithm stops before fully converging (because of ``tol`` or\n``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\ni.e. the ``cluster_centers_`` will not be the means of the points in each\ncluster. Also, the estimator will reassign ``labels_`` after the last\niteration to make ``labels_`` consistent with ``predict`` on the training\nset.\n\nExamples\n--------\n\n>>> from sklearn.cluster import KMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n>>> kmeans.labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> kmeans.predict([[0, 0], [12, 3]])\narray([1, 0], dtype=int32)\n>>> kmeans.cluster_centers_\narray([[10., 2.],\n [ 1., 2.]])" - }, - { - "name": "MiniBatchKMeans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form as well as the number of centroids to generate." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations over the complete dataset before stopping independently of any early stopping criterion heuristics." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Size of the mini batches." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "compute_labels", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Compute label assignment and inertia for the complete dataset once the minibatch optimization has converged in fit." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization and random reassignment. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Control early stopping based on the relative center changes as measured by a smoothed, variance-normalized of the mean center squared position changes. This early stopping heuristics is closer to the one used for the batch variant of the algorithms but induces a slight computational and memory overhead over the inertia heuristic. To disable convergence detection based on normalized center change, set tol to 0.0 (default)." - }, - { - "name": "max_no_improvement", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Control early stopping based on the consecutive number of mini batches that does not yield an improvement on the smoothed inertia. To disable convergence detection based on inertia, set max_no_improvement to None." - }, - { - "name": "init_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to randomly sample for speeding up the initialization (sometimes at the expense of accuracy): the only algorithm is initialized by running a batch KMeans on a random subset of the data. This needs to be larger than n_clusters. If `None`, `init_size= 3 * batch_size`." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of random initializations that are tried. In contrast to KMeans, the algorithm is only run once, using the best of the ``n_init`` initializations as measured by inertia." - }, - { - "name": "reassignment_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Control the fraction of the maximum number of counts for a center to be reassigned. A higher value means that low count centers are more easily reassigned, which means that the model will take longer to converge, but should converge in a better clustering." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "counts_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_size_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "random_state_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None). .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the centroids on X by chunking it into mini-batches.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself" - }, - { - "name": "_labels_inertia_minibatch", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute labels and inertia using mini batches.\n\nThis is slightly slower than doing everything at once but prevents\nmemory errors / segfaults.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\ninertia : float\n Sum of squared distances of points to nearest cluster." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coordinates of the data points to cluster. It must be noted that X will be copied if it is not C-contiguous." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Update k means estimate on a single mini-batch X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Coordinates of the data points to cluster. It must be noted that\n X will be copied if it is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nmax_iter : int, default=100\n Maximum number of iterations over the complete dataset before\n stopping independently of any early stopping criterion heuristics.\n\nbatch_size : int, default=100\n Size of the mini batches.\n\nverbose : int, default=0\n Verbosity mode.\n\ncompute_labels : bool, default=True\n Compute label assignment and inertia for the complete dataset\n once the minibatch optimization has converged in fit.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and\n random reassignment. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ntol : float, default=0.0\n Control early stopping based on the relative center changes as\n measured by a smoothed, variance-normalized of the mean center\n squared position changes. This early stopping heuristics is\n closer to the one used for the batch variant of the algorithms\n but induces a slight computational and memory overhead over the\n inertia heuristic.\n\n To disable convergence detection based on normalized center\n change, set tol to 0.0 (default).\n\nmax_no_improvement : int, default=10\n Control early stopping based on the consecutive number of mini\n batches that does not yield an improvement on the smoothed inertia.\n\n To disable convergence detection based on inertia, set\n max_no_improvement to None.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy): the\n only algorithm is initialized by running a batch KMeans on a\n random subset of the data. This needs to be larger than n_clusters.\n\n If `None`, `init_size= 3 * batch_size`.\n\nn_init : int, default=3\n Number of random initializations that are tried.\n In contrast to KMeans, the algorithm is only run once, using the\n best of the ``n_init`` initializations as measured by inertia.\n\nreassignment_ratio : float, default=0.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more easily reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nAttributes\n----------\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : int\n Labels of each point (if compute_labels is set to True).\n\ninertia_ : float\n The value of the inertia criterion associated with the chosen\n partition (if compute_labels is set to True). The inertia is\n defined as the sum of square distances of samples to their nearest\n neighbor.\n\nn_iter_ : int\n Number of batches processed.\n\ncounts_ : ndarray of shape (n_clusters,)\n Weigth sum of each cluster.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\ninit_size_ : int\n The effective number of samples used for the initialization.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\nSee Also\n--------\nKMeans : The classic implementation of the clustering method based on the\n Lloyd's algorithm. It consumes the whole set of input data at each\n iteration.\n\nNotes\n-----\nSee https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\nExamples\n--------\n>>> from sklearn.cluster import MiniBatchKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 0], [4, 4],\n... [4, 5], [0, 1], [2, 2],\n... [3, 2], [5, 5], [1, -1]])\n>>> # manually fit on batches\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6)\n>>> kmeans = kmeans.partial_fit(X[0:6,:])\n>>> kmeans = kmeans.partial_fit(X[6:12,:])\n>>> kmeans.cluster_centers_\narray([[2. , 1. ],\n [3.5, 4.5]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)\n>>> # fit on the whole data\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6,\n... max_iter=10).fit(X)\n>>> kmeans.cluster_centers_\narray([[3.95918367, 2.40816327],\n [1.12195122, 1.3902439 ]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([1, 0], dtype=int32)" - } - ], - "functions": [ - { - "name": "_kmeans_plusplus", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to pick seeds for." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of seeds to choose." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared Euclidean norm of each data point." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to initialize the centers. See :term:`Glossary `." - }, - { - "name": "n_local_trials", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of seeding trials for each center (except the first), of which the one reducing inertia the most is greedily chosen. Set to None to make the number of trials depend logarithmically on the number of seeds (2+log(k)); this is the default." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computational component for initialization of n_clusters by\nk-means++. Prior validation of data is assumed.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds for.\n\nn_clusters : int\n The number of seeds to choose.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared Euclidean norm of each data point.\n\nrandom_state : RandomState instance\n The generator used to initialize the centers.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)); this is the default.\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center." - }, - { - "name": "_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a tolerance which is independent of the dataset." - }, - { - "name": "k_means", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form as well as the number of centroids to generate." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization." - }, - { - "name": "precompute_distances", - "type": "Literal['auto', True, False]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 million. This corresponds to about 100MB overhead per job using double precision. True : always precompute distances False : never precompute distances .. deprecated:: 0.23 'precompute_distances' was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25). It has no effect." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm to run." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "copy_x", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified. If False, the original data is modified, and put back before the function returns, but small numerical differences may be introduced by subtracting and then adding the data mean. Note that if the original data is not C-contiguous, a copy will be made even if copy_x is False. If the original data is sparse, but not in CSR format, a copy will be made even if copy_x is False." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center. ``None`` or ``-1`` means using all processors. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "algorithm", - "type": "Literal[\"auto\", \"full\", \"elkan\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "K-means algorithm to use. The classical EM-style algorithm is \"full\". The \"elkan\" variation is more efficient on data with well-defined clusters, by using the triangle inequality. However it's more memory intensive due to the allocation of an extra array of shape (n_samples, n_clusters). For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it might change in the future for a better heuristic." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "K-means clustering algorithm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\nn_clusters : int\n The number of clusters to form as well as the number of\n centroids to generate.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nprecompute_distances : {'auto', True, False}\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances\n\n False : never precompute distances\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.23 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nbest_n_iter : int\n Number of iterations corresponding to the best results.\n Returned only if `return_n_iter` is set to True." - }, - { - "name": "_kmeans_single_elkan", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations to cluster. If sparse matrix, must be in CSR format." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "centers_init", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial centers." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm to run." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "x_squared_norms", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed x_squared_norms." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. It's not advised to set `tol=0` since convergence might never be declared due to rounding errors. Use a very small number instead." - }, - { - "name": "n_threads", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A single run of k-means elkan, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\nx_squared_norms : array-like, default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run." - }, - { - "name": "_kmeans_single_lloyd", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations to cluster. If sparse matrix, must be in CSR format." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "centers_init", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial centers." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm to run." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode" - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed x_squared_norms." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. It's not advised to set `tol=0` since convergence might never be declared due to rounding errors. Use a very small number instead." - }, - { - "name": "n_threads", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A single run of k-means lloyd, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode\n\nx_squared_norms : ndarray of shape (n_samples,), default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run." - }, - { - "name": "_labels_inertia", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples to assign to the labels. If sparse matrix, must be in CSR format." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed squared euclidean norm of each data point, to speed up computations." - }, - { - "name": "centers", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The cluster centers." - }, - { - "name": "n_threads", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples to assign to the labels. If sparse matrix, must\n be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Precomputed squared euclidean norm of each data point, to speed up\n computations.\n\ncenters : ndarray of shape (n_clusters, n_features)\n The cluster centers.\n\nn_threads : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The resulting assignment.\n\ninertia : float\n Sum of squared distances of samples to their closest cluster center." - }, - { - "name": "_mini_batch_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The original data array." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared euclidean norm of each data point." - }, - { - "name": "centers", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The cluster centers. This array is MODIFIED IN PLACE" - }, - { - "name": "old_center_buffer", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy of old centers for monitoring convergence." - }, - { - "name": "compute_squared_diff", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If set to False, the squared diff computation is skipped." - }, - { - "name": "distances", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, should be a pre-allocated array that will be used to store the distances of each sample to its closest center. May not be None when random_reassign is True." - }, - { - "name": "random_reassign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, centers with very low counts are randomly reassigned to observations." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization and to pick new clusters amongst observations with uniform probability. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "reassignment_ratio", - "type": "float", - "hasDefault": true, - "default": ".", - "limitation": null, - "ignored": false, - "docstring": "Control the fraction of the maximum number of counts for a center to be reassigned. A higher value means that low count centers are more likely to be reassigned, which means that the model will take longer to converge, but should converge in a better clustering." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Incremental update of the centers for the Minibatch K-Means algorithm.\n\nParameters\n----------\n\nX : ndarray of shape (n_samples, n_features)\n The original data array.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point.\n\ncenters : ndarray of shape (k, n_features)\n The cluster centers. This array is MODIFIED IN PLACE\n\nold_center_buffer : int\n Copy of old centers for monitoring convergence.\n\ncompute_squared_diff : bool\n If set to False, the squared diff computation is skipped.\n\ndistances : ndarray of shape (n_samples,), dtype=float, default=None\n If not None, should be a pre-allocated array that will be used to store\n the distances of each sample to its closest center.\n May not be None when random_reassign is True.\n\nrandom_reassign : bool, default=False\n If True, centers with very low counts are randomly reassigned\n to observations.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and to\n pick new clusters amongst observations with uniform probability. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nreassignment_ratio : float, default=.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more likely to be reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nverbose : bool, default=False\n Controls the verbosity.\n\nReturns\n-------\ninertia : float\n Sum of squared distances of samples to their closest cluster center.\n\nsquared_diff : ndarray of shape (n_clusters,)\n Squared distances between previous and updated cluster centers." - }, - { - "name": "_mini_batch_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to encapsulate the early stopping logic." - }, - { - "name": "kmeans_plusplus", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to pick seeds from." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of centroids to initialize" - }, - { - "name": "x_squared_norms", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared Euclidean norm of each data point." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_local_trials", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of seeding trials for each center (except the first), of which the one reducing inertia the most is greedily chosen. Set to None to make the number of trials depend logarithmically on the number of seeds (2+log(k))." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Init n_clusters seeds according to k-means++\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds from.\n\nn_clusters : int\n The number of centroids to initialize\n\nx_squared_norms : array-like of shape (n_samples,), default=None\n Squared Euclidean norm of each data point.\n\nrandom_state : int or RandomState instance, default=None\n Determines random number generation for centroid initialization. Pass\n an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.\n\nNotes\n-----\nSelects initial cluster centers for k-mean clustering in a smart way\nto speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n\"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\non Discrete algorithms. 2007\n\nExamples\n--------\n\n>>> from sklearn.cluster import kmeans_plusplus\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n>>> centers\narray([[10, 4],\n [ 1, 0]])\n>>> indices\narray([4, 2])" - } - ] - }, - { - "name": "sklearn.cluster._mean_shift", - "imports": [ - "import numpy as np", - "import warnings", - "from joblib import Parallel", - "from collections import defaultdict", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils import check_random_state", - "from utils import gen_batches", - "from utils import check_array", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from neighbors import NearestNeighbors", - "from metrics.pairwise import pairwise_distances_argmin", - "from _config import config_context" - ], - "classes": [ - { - "name": "MeanShift", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "bandwidth", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Bandwidth used in the RBF kernel. If not given, the bandwidth is estimated using sklearn.cluster.estimate_bandwidth; see the documentation for that function for hints on scalability (see also the Notes, below)." - }, - { - "name": "seeds", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Seeds used to initialize kernels. If not set, the seeds are calculated by clustering.get_bin_seeds with bandwidth as the grid size and default values for other parameters." - }, - { - "name": "bin_seeding", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized. The default value is False. Ignored if seeds argument is not None." - }, - { - "name": "min_bin_freq", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "To speed up the algorithm, accept only those bins with at least min_bin_freq points as seeds." - }, - { - "name": "cluster_all", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, then all points are clustered, even those orphans that are not within any kernel. Orphans are assigned to the nearest kernel. If false, then orphans are given cluster label -1." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations, per seed point before the clustering operation terminates (for that seed point), if has not converged yet. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples to cluster." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform clustering.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to cluster.\n\ny : Ignored" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - } - ], - "docstring": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=None\n Bandwidth used in the RBF kernel.\n\n If not given, the bandwidth is estimated using\n sklearn.cluster.estimate_bandwidth; see the documentation for that\n function for hints on scalability (see also the Notes, below).\n\nseeds : array-like of shape (n_samples, n_features), default=None\n Seeds used to initialize kernels. If not set,\n the seeds are calculated by clustering.get_bin_seeds\n with bandwidth as the grid size and default values for\n other parameters.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n The default value is False.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\nn_iter_ : int\n Maximum number of iterations performed on each seed.\n\n .. versionadded:: 0.22\n\nExamples\n--------\n>>> from sklearn.cluster import MeanShift\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = MeanShift(bandwidth=2).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering.predict([[0, 0], [5, 5]])\narray([1, 0])\n>>> clustering\nMeanShift(bandwidth=2)\n\nNotes\n-----\n\nScalability:\n\nBecause this implementation uses a flat kernel and\na Ball Tree to look up members of each kernel, the complexity will tend\ntowards O(T*n*log(n)) in lower dimensions, with n the number of samples\nand T the number of points. In higher dimensions the complexity will\ntend towards O(T*n^2).\n\nScalability can be boosted by using fewer seeds, for example by using\na higher value of min_bin_freq in the get_bin_seeds function.\n\nNote that the estimate_bandwidth function is much less scalable than the\nmean shift algorithm and will be the bottleneck if it is used.\n\nReferences\n----------\n\nDorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\nfeature space analysis\". IEEE Transactions on Pattern Analysis and\nMachine Intelligence. 2002. pp. 603-619." - } - ], - "functions": [ - { - "name": "estimate_bandwidth", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input points." - }, - { - "name": "quantile", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "should be between [0, 1] 0.5 means that the median of all pairwise distances is used." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to use. If not given, all samples are used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to randomly select the samples from input points for bandwidth estimation. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the bandwidth to use with the mean-shift algorithm.\n\nThat this function takes time at least quadratic in n_samples. For large\ndatasets, it's wise to set that parameter to a small value.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input points.\n\nquantile : float, default=0.3\n should be between [0, 1]\n 0.5 means that the median of all pairwise distances is used.\n\nn_samples : int, default=None\n The number of samples to use. If not given, all samples are used.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to randomly select the samples from input points\n for bandwidth estimation. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nbandwidth : float\n The bandwidth parameter." - }, - { - "name": "_mean_shift_single_seed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "mean_shift", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "bandwidth", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel bandwidth. If bandwidth is not given, it is determined using a heuristic based on the median of all pairwise distances. This will take quadratic time in the number of samples. The sklearn.cluster.estimate_bandwidth function can be used to do this more efficiently." - }, - { - "name": "seeds", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Point used as initial kernel locations. If None and bin_seeding=False, each data point is used as a seed. If None and bin_seeding=True, see bin_seeding." - }, - { - "name": "bin_seeding", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized. Ignored if seeds argument is not None." - }, - { - "name": "min_bin_freq", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "To speed up the algorithm, accept only those bins with at least min_bin_freq points as seeds." - }, - { - "name": "cluster_all", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, then all points are clustered, even those orphans that are not within any kernel. Orphans are assigned to the nearest kernel. If false, then orphans are given cluster label -1." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations, per seed point before the clustering operation terminates (for that seed point), if has not converged yet." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.17 Parallel Execution using *n_jobs*." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform mean shift clustering of data using a flat kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nbandwidth : float, default=None\n Kernel bandwidth.\n\n If bandwidth is not given, it is determined using a heuristic based on\n the median of all pairwise distances. This will take quadratic time in\n the number of samples. The sklearn.cluster.estimate_bandwidth function\n can be used to do this more efficiently.\n\nseeds : array-like of shape (n_seeds, n_features) or None\n Point used as initial kernel locations. If None and bin_seeding=False,\n each data point is used as a seed. If None and bin_seeding=True,\n see bin_seeding.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.17\n Parallel Execution using *n_jobs*.\n\nReturns\n-------\n\ncluster_centers : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_mean_shift.py\n`." - }, - { - "name": "get_bin_seeds", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input points, the same points that will be used in mean_shift." - }, - { - "name": "bin_size", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the coarseness of the binning. Smaller values lead to more seeding (which is computationally more expensive). If you're not sure how to set this, set it to the value of the bandwidth used in clustering.mean_shift." - }, - { - "name": "min_bin_freq", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Only bins with at least min_bin_freq will be selected as seeds. Raising this value decreases the number of seeds found, which makes mean_shift computationally cheaper." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds seeds for mean_shift.\n\nFinds seeds by first binning data onto a grid whose lines are\nspaced bin_size apart, and then choosing those bins with at least\nmin_bin_freq points.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input points, the same points that will be used in mean_shift.\n\nbin_size : float\n Controls the coarseness of the binning. Smaller values lead\n to more seeding (which is computationally more expensive). If you're\n not sure how to set this, set it to the value of the bandwidth used\n in clustering.mean_shift.\n\nmin_bin_freq : int, default=1\n Only bins with at least min_bin_freq will be selected as seeds.\n Raising this value decreases the number of seeds found, which\n makes mean_shift computationally cheaper.\n\nReturns\n-------\nbin_seeds : array-like of shape (n_samples, n_features)\n Points used as initial kernel positions in clustering.mean_shift." - } - ] - }, - { - "name": "sklearn.cluster._optics", - "imports": [ - "import warnings", - "import numpy as np", - "from utils import gen_batches", - "from utils import get_chunk_n_rows", - "from utils.validation import _deprecate_positional_args", - "from neighbors import NearestNeighbors", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from metrics import pairwise_distances" - ], - "classes": [ - { - "name": "OPTICS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "min_samples", - "type": null, - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of samples in a neighborhood for a point to be considered as a core point. Also, up and down steep regions can't have more than ``min_samples`` consecutive non-steep points. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)." - }, - { - "name": "max_eps", - "type": "float", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. Default value of ``np.inf`` will identify clusters across all scales; reducing ``max_eps`` will result in shorter run times." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "cluster_method", - "type": "str", - "hasDefault": true, - "default": "'xi'", - "limitation": null, - "ignored": false, - "docstring": "The extraction method used to extract clusters using the calculated reachability and ordering. Possible values are \"xi\" and \"dbscan\"." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. By default it assumes the same value as ``max_eps``. Used only when ``cluster_method='dbscan'``." - }, - { - "name": "xi", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi. Used only when ``cluster_method='xi'``." - }, - { - "name": "predecessor_correction", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Correct clusters according to the predecessors calculated by OPTICS [2]_. This parameter has minimal effect on most datasets. Used only when ``cluster_method='xi'``." - }, - { - "name": "min_cluster_size", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2). If ``None``, the value of ``min_samples`` is used instead. Used only when ``cluster_method='xi'``." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. (default) Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A feature array, or array of distances between samples if metric='precomputed'." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019\n A feature array, or array of distances between samples if\n metric='precomputed'.\n\ny : ignored\n Ignored.\n\nReturns\n-------\nself : instance of OPTICS\n The instance." - } - ], - "docstring": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmin_samples : int > 1 or float between 0 and 1, default=5\n The number of samples in a neighborhood for a point to be considered as\n a core point. Also, up and down steep regions can't have more than\n ``min_samples`` consecutive non-steep points. Expressed as an absolute\n number or a fraction of the number of samples (rounded to be at least\n 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncluster_method : str, default='xi'\n The extraction method used to extract clusters using the calculated\n reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\neps : float, default=None\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. By default it assumes the same value\n as ``max_eps``.\n Used only when ``cluster_method='dbscan'``.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n Used only when ``cluster_method='xi'``.\n\npredecessor_correction : bool, default=True\n Correct clusters according to the predecessors calculated by OPTICS\n [2]_. This parameter has minimal effect on most datasets.\n Used only when ``cluster_method='xi'``.\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n Used only when ``cluster_method='xi'``.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nlabels_ : ndarray of shape (n_samples,)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples and points which are not included in a leaf cluster\n of ``cluster_hierarchy_`` are labeled as -1.\n\nreachability_ : ndarray of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\nordering_ : ndarray of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : ndarray of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\ncluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to\n ``(end, -start)`` (ascending) so that larger clusters encompassing\n smaller clusters come after those smaller ones. Since ``labels_`` does\n not reflect the hierarchy, usually\n ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n note that these indices are of the ``ordering_``, i.e.\n ``X[ordering_][start:end + 1]`` form a cluster.\n Only available when ``cluster_method='xi'``.\n\nSee Also\n--------\nDBSCAN : A similar clustering for a specified neighborhood radius (eps).\n Our implementation is optimized for runtime.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n.. [2] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\nExamples\n--------\n>>> from sklearn.cluster import OPTICS\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 5], [3, 6],\n... [8, 7], [8, 8], [7, 3]])\n>>> clustering = OPTICS(min_samples=2).fit(X)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])" - } - ], - "functions": [ - { - "name": "_validate_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_core_distances_", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "neighbors", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The fitted nearest neighbors estimator." - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the k-th nearest neighbor of each sample\n\nEquivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]\nbut with more memory efficiency.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\nneighbors : NearestNeighbors instance\n The fitted nearest neighbors estimator.\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\ncore_distances : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point.\n Points which will never be core have a distance of inf." - }, - { - "name": "compute_optics_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A feature array, or array of distances between samples if metric='precomputed'" - }, - { - "name": "min_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples in a neighborhood for a point to be considered as a core point. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)." - }, - { - "name": "max_eps", - "type": "float", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. Default value of ``np.inf`` will identify clusters across all scales; reducing ``max_eps`` will result in shorter run times." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. (default) Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the OPTICS reachability graph.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019.\n A feature array, or array of distances between samples if\n metric='precomputed'\n\nmin_samples : int > 1 or float between 0 and 1\n The number of samples in a neighborhood for a point to be considered\n as a core point. Expressed as an absolute number or a fraction of the\n number of samples (rounded to be at least 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nordering_ : array of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : array of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\nreachability_ : array of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : array of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60." - }, - { - "name": "_set_reach_dist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "cluster_optics_dbscan", - "decorators": [], - "parameters": [ - { - "name": "reachability", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Reachability distances calculated by OPTICS (``reachability_``)" - }, - { - "name": "core_distances", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances at which points become core (``core_distances_``)" - }, - { - "name": "ordering", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OPTICS ordered point indices (``ordering_``)" - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close to one another." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs DBSCAN extraction for an arbitrary epsilon.\n\nExtracting the clusters runs in linear time. Note that this results in\n``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with\nsimilar settings and ``eps``, only if ``eps`` is close to ``max_eps``.\n\nParameters\n----------\nreachability : array of shape (n_samples,)\n Reachability distances calculated by OPTICS (``reachability_``)\n\ncore_distances : array of shape (n_samples,)\n Distances at which points become core (``core_distances_``)\n\nordering : array of shape (n_samples,)\n OPTICS ordered point indices (``ordering_``)\n\neps : float\n DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results\n will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close\n to one another.\n\nReturns\n-------\nlabels_ : array of shape (n_samples,)\n The estimated labels." - }, - { - "name": "cluster_optics_xi", - "decorators": [], - "parameters": [ - { - "name": "reachability", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Reachability distances calculated by OPTICS (`reachability_`)" - }, - { - "name": "predecessor", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predecessors calculated by OPTICS." - }, - { - "name": "ordering", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OPTICS ordered point indices (`ordering_`)" - }, - { - "name": "min_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)." - }, - { - "name": "min_cluster_size", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2). If ``None``, the value of ``min_samples`` is used instead." - }, - { - "name": "xi", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi." - }, - { - "name": "predecessor_correction", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Correct clusters based on the calculated predecessors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nParameters\n----------\nreachability : ndarray of shape (n_samples,)\n Reachability distances calculated by OPTICS (`reachability_`)\n\npredecessor : ndarray of shape (n_samples,)\n Predecessors calculated by OPTICS.\n\nordering : ndarray of shape (n_samples,)\n OPTICS ordered point indices (`ordering_`)\n\nmin_samples : int > 1 or float between 0 and 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n Expressed as an absolute number or a fraction of the number of samples\n (rounded to be at least 2).\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\npredecessor_correction : bool, default=True\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The labels assigned to samples. Points which are not included\n in any cluster are labeled as -1.\n\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to ``(end,\n -start)`` (ascending) so that larger clusters encompassing smaller\n clusters come after such nested smaller clusters. Since ``labels`` does\n not reflect the hierarchy, usually ``len(clusters) >\n np.unique(labels)``." - }, - { - "name": "_extend_region", - "decorators": [], - "parameters": [ - { - "name": "steep_point", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True if the point is steep downward (upward)." - }, - { - "name": "xward_point", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True if the point is an upward (respectively downward) point." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The start of the xward region." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n - steep_{upward/downward}: bool array indicating whether a point is a\n steep {upward/downward};\n - upward/downward: bool array indicating whether a point is\n upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.\n\nParameters\n----------\nsteep_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is steep downward (upward).\n\nxward_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is an upward (respectively downward) point.\n\nstart : int\n The start of the xward region.\n\nmin_samples : int\n The same as the min_samples given to OPTICS. Up and down steep\n regions can't have more then ``min_samples`` consecutive non-steep\n points.\n\nReturns\n-------\nindex : int\n The current index iterating over all the samples, i.e. where we are up\n to in our search.\n\nend : int\n The end of the region, which can be behind the index. The region\n includes the ``end`` index." - }, - { - "name": "_update_filter_sdas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update steep down areas (SDAs) using the new maximum in between (mib)\nvalue, and the given complement of xi, i.e. ``1 - xi``." - }, - { - "name": "_correct_predecessor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329." - }, - { - "name": "_xi_cluster", - "decorators": [], - "parameters": [ - { - "name": "reachability_plot", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The reachability plot, i.e. reachability ordered according to the calculated ordering, all computed by OPTICS." - }, - { - "name": "predecessor_plot", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predecessors ordered according to the calculated ordering." - }, - { - "name": "xi", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi." - }, - { - "name": "min_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points." - }, - { - "name": "min_cluster_size", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples in an OPTICS cluster." - }, - { - "name": "predecessor_correction", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Correct clusters based on the calculated predecessors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nThis is rouphly an implementation of Figure 19 of the OPTICS paper.\n\nParameters\n----------\nreachability_plot : array-like of shape (n_samples,)\n The reachability plot, i.e. reachability ordered according to\n the calculated ordering, all computed by OPTICS.\n\npredecessor_plot : array-like of shape (n_samples,)\n Predecessors ordered according to the calculated ordering.\n\nxi : float, between 0 and 1\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\nmin_samples : int > 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n\nmin_cluster_size : int > 1\n Minimum number of samples in an OPTICS cluster.\n\npredecessor_correction : bool\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of [start, end] in each row, with all\n indices inclusive. The clusters are ordered in a way that larger\n clusters encompassing smaller clusters come after those smaller\n clusters." - }, - { - "name": "_extract_xi_labels", - "decorators": [], - "parameters": [ - { - "name": "ordering", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ordering of points calculated by OPTICS" - }, - { - "name": "clusters", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of clusters i.e. (start, end) tuples, as returned by `_xi_cluster`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extracts the labels from the clusters returned by `_xi_cluster`.\nWe rely on the fact that clusters are stored\nwith the smaller clusters coming before the larger ones.\n\nParameters\n----------\nordering : array-like of shape (n_samples,)\n The ordering of points calculated by OPTICS\n\nclusters : array-like of shape (n_clusters, 2)\n List of clusters i.e. (start, end) tuples,\n as returned by `_xi_cluster`.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)" - } - ] - }, - { - "name": "sklearn.cluster._spectral", - "imports": [ - "import warnings", - "import numpy as np", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from utils import check_random_state", - "from utils import as_float_array", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated", - "from metrics.pairwise import pairwise_kernels", - "from neighbors import kneighbors_graph", - "from neighbors import NearestNeighbors", - "from manifold import spectral_embedding", - "from _kmeans import k_means", - "from scipy.sparse import csc_matrix", - "from scipy.linalg import LinAlgError" - ], - "classes": [ - { - "name": "SpectralClustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The dimension of the projection subspace." - }, - { - "name": "eigen_solver", - "type": "Literal['arpack', 'lobpcg', 'amg']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "n_clusters", - "limitation": null, - "ignored": false, - "docstring": "Number of eigen vectors to use for the spectral embedding" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by the K-Means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels. Ignored for ``affinity='nearest_neighbors'``." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "How to construct the affinity matrix. - 'nearest_neighbors' : construct the affinity matrix by computing a graph of nearest neighbors. - 'rbf' : construct the affinity matrix using a radial basis function (RBF) kernel. - 'precomputed' : interpret ``X`` as a precomputed affinity matrix. - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph of precomputed nearest neighbors, and constructs the affinity matrix by selecting the ``n_neighbors`` nearest neighbors. - one of the kernels supported by :func:`~sklearn.metrics.pairwise_kernels`. Only kernels that produce similarity scores (non-negative values that increase with similarity) should be used. This property is not checked by the clustering algorithm." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for ``affinity='rbf'``." - }, - { - "name": "eigen_tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when ``eigen_solver='arpack'``." - }, - { - "name": "assign_labels", - "type": "Literal['kmeans', 'discretize']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The strategy to use to assign labels in the embedding space. There are two ways to assign labels after the laplacian embedding. k-means can be applied and is a popular choice. But it can also be sensitive to initialization. Discretization is another approach which is less sensitive to random initialization." - }, - { - "name": "degree", - "type": "float", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run when `affinity='nearest_neighbors'` or `affinity='precomputed_nearest_neighbors'`. The neighbors search will be done in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse matrix is provided in a format other than ``csr_matrix``, ``csc_matrix``, or ``coo_matrix``, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform spectral clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse matrix is\n provided in a format other than ``csr_matrix``, ``csc_matrix``,\n or ``coo_matrix``, it will be converted into a sparse\n ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse matrix is provided in a format other than ``csr_matrix``, ``csc_matrix``, or ``coo_matrix``, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform spectral clustering from features, or affinity matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse matrix is\n provided in a format other than ``csr_matrix``, ``csc_matrix``,\n or ``coo_matrix``, it will be converted into a sparse\n ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\nkernel function such the Gaussian (aka RBF) kernel of the euclidean\ndistanced ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, using ``precomputed``, a user-provided affinity\nmatrix can be used.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nn_components : int, default=n_clusters\n Number of eigen vectors to use for the spectral embedding\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\ngamma : float, default=1.0\n Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix using a radial basis function\n (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - one of the kernels supported by\n :func:`~sklearn.metrics.pairwise_kernels`.\n\n Only kernels that produce similarity scores (non-negative values that\n increase with similarity) should be used. This property is not checked\n by the clustering algorithm.\n\nn_neighbors : int, default=10\n Number of neighbors to use when constructing the affinity matrix using\n the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when ``eigen_solver='arpack'``.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another approach\n which is less sensitive to random initialization.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n Parameters (keyword arguments) and values for kernel passed as\n callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n The number of parallel jobs to run when `affinity='nearest_neighbors'`\n or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n will be done in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n Affinity matrix used for clustering. Available only if after calling\n ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n... assign_labels=\"discretize\",\n... random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n random_state=0)\n\nNotes\n-----\nIf you have an affinity matrix, such as a distance matrix,\nfor which 0 means identical elements, and high values means\nvery dissimilar elements, it can be transformed in a\nsimilarity matrix that is well suited for the algorithm by\napplying the Gaussian (RBF, heat) kernel::\n\n np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nWhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAnother alternative is to take a symmetric version of the k\nnearest neighbors connectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf" - } - ], - "functions": [ - { - "name": "discretize", - "decorators": [], - "parameters": [ - { - "name": "vectors", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The embedding space of the samples." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy vectors, or perform in-place normalization." - }, - { - "name": "max_svd_restarts", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of attempts to restart SVD if convergence fails" - }, - { - "name": "n_iter_max", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to attempt in rotation and partition matrix search if machine precision convergence is not reached" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for rotation matrix initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Search for a partition matrix (clustering) which is closest to the\neigenvector embedding.\n\nParameters\n----------\nvectors : array-like of shape (n_samples, n_clusters)\n The embedding space of the samples.\n\ncopy : bool, default=True\n Whether to copy vectors, or perform in-place normalization.\n\nmax_svd_restarts : int, default=30\n Maximum number of attempts to restart SVD if convergence fails\n\nn_iter_max : int, default=30\n Maximum number of iterations to attempt in rotation and partition\n matrix search if machine precision convergence is not reached\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for rotation matrix initialization.\n Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\n\nThe eigenvector embedding is used to iteratively search for the\nclosest discrete partition. First, the eigenvector embedding is\nnormalized to the space of partition matrices. An optimal discrete\npartition matrix closest to this normalized embedding multiplied by\nan initial rotation is calculated. Fixing this discrete partition\nmatrix, an optimal rotation matrix is calculated. These two\ncalculations are performed until convergence. The discrete partition\nmatrix is returned as the clustering solution. Used in spectral\nclustering, this method tends to be faster and more robust to random\ninitialization than k-means." - }, - { - "name": "spectral_clustering", - "decorators": [], - "parameters": [ - { - "name": "affinity", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The affinity matrix describing the relationship of the samples to embed. **Must be symmetric**. Possible examples: - adjacency matrix of a graph, - heat kernel of the pairwise distance matrix of the samples, - symmetric k-nearest neighbours connectivity matrix of the samples." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of clusters to extract." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "n_clusters", - "limitation": null, - "ignored": false, - "docstring": "Number of eigen vectors to use for the spectral embedding" - }, - { - "name": "eigen_solver", - "type": "Optional[Literal['arpack', 'lobpcg']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by the K-Means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "eigen_tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when using arpack eigen_solver." - }, - { - "name": "assign_labels", - "type": "Literal['kmeans', 'discretize']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The strategy to use to assign labels in the embedding space. There are two ways to assign labels after the laplacian embedding. k-means can be applied and is a popular choice. But it can also be sensitive to initialization. Discretization is another approach which is less sensitive to random initialization. See the 'Multiclass spectral clustering' paper referenced below for more details on the discretization approach." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\naffinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n The affinity matrix describing the relationship of the samples to\n embed. **Must be symmetric**.\n\n Possible examples:\n - adjacency matrix of a graph,\n - heat kernel of the pairwise distance matrix of the samples,\n - symmetric k-nearest neighbours connectivity matrix of the samples.\n\nn_clusters : int, default=None\n Number of clusters to extract.\n\nn_components : int, default=n_clusters\n Number of eigen vectors to use for the spectral embedding\n\neigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another\n approach which is less sensitive to random initialization. See\n the 'Multiclass spectral clustering' paper referenced below for\n more details on the discretization approach.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\nThe graph should contain only one connect component, elsewhere\nthe results make little sense.\n\nThis algorithm solves the normalized cut for k=2: it is a\nnormalized spectral clustering." - } - ] - }, - { - "name": "sklearn.cluster", - "imports": [ - "from _spectral import spectral_clustering", - "from _spectral import SpectralClustering", - "from _mean_shift import mean_shift", - "from _mean_shift import MeanShift", - "from _mean_shift import estimate_bandwidth", - "from _mean_shift import get_bin_seeds", - "from _affinity_propagation import affinity_propagation", - "from _affinity_propagation import AffinityPropagation", - "from _agglomerative import ward_tree", - "from _agglomerative import AgglomerativeClustering", - "from _agglomerative import linkage_tree", - "from _agglomerative import FeatureAgglomeration", - "from _kmeans import k_means", - "from _kmeans import KMeans", - "from _kmeans import MiniBatchKMeans", - "from _kmeans import kmeans_plusplus", - "from _dbscan import dbscan", - "from _dbscan import DBSCAN", - "from _optics import OPTICS", - "from _optics import cluster_optics_dbscan", - "from _optics import compute_optics_graph", - "from _optics import cluster_optics_xi", - "from _bicluster import SpectralBiclustering", - "from _bicluster import SpectralCoclustering", - "from _birch import Birch" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.cluster.tests.common", - "imports": [ - "import numpy as np" - ], - "classes": [], - "functions": [ - { - "name": "generate_clustered_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_affinity_propagation", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csr_matrix", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.cluster import AffinityPropagation", - "from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences", - "from sklearn.cluster import affinity_propagation", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import euclidean_distances" - ], - "classes": [], - "functions": [ - { - "name": "test_affinity_propagation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_predict_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_fit_non_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_equal_mutual_similarities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_predict_non_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_non_convergence_regressiontest", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_equal_similarities_and_preferences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_random_state_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_convergence_warning_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Non-regression, see #13334" - }, - { - "name": "test_affinity_propagation_float32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_bicluster", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.base import BaseEstimator", - "from sklearn.base import BiclusterMixin", - "from sklearn.cluster import SpectralCoclustering", - "from sklearn.cluster import SpectralBiclustering", - "from sklearn.cluster._bicluster import _scale_normalize", - "from sklearn.cluster._bicluster import _bistochastic_normalize", - "from sklearn.cluster._bicluster import _log_normalize", - "from sklearn.metrics import consensus_score", - "from sklearn.metrics import v_measure_score", - "from sklearn.datasets import make_biclusters", - "from sklearn.datasets import make_checkerboard" - ], - "classes": [ - { - "name": "MockBiclustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_get_submatrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_shape_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_coclustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_biclustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_do_scale_test", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that rows sum to one constant, and columns to another." - }, - { - "name": "_do_bistochastic_test", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that rows and columns sum to the same constant." - }, - { - "name": "test_scale_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bistochastic_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_best_piecewise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_project_and_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perfect_checkerboard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_jobs_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_birch", - "imports": [ - "from scipy import sparse", - "import numpy as np", - "import pytest", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.cluster import Birch", - "from sklearn.cluster import AgglomerativeClustering", - "from sklearn.datasets import make_blobs", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model import ElasticNet", - "from sklearn.metrics import pairwise_distances_argmin", - "from sklearn.metrics import v_measure_score", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns" - ], - "classes": [], - "functions": [ - { - "name": "test_n_samples_leaves_roots", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_birch_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_second_call_error_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_branching_factor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_branching_factor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Use the leaf linked list for traversal" - }, - { - "name": "test_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_birch_n_clusters_long_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_dbscan", - "imports": [ - "import pickle", - "import numpy as np", - "import warnings", - "from scipy.spatial import distance", - "from scipy import sparse", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.cluster import DBSCAN", - "from sklearn.cluster import dbscan", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.metrics.pairwise import pairwise_distances" - ], - "classes": [], - "functions": [ - { - "name": "test_dbscan_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_sparse_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_sparse_precomputed_different_eps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_input_not_modified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_no_core_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_metric_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_balltree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_badargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_boundaries", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_dbscan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_core_samples_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_precomputed_metric_with_degenerate_input_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_precomputed_metric_with_initial_rows_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_feature_agglomeration", - "imports": [ - "import numpy as np", - "from sklearn.cluster import FeatureAgglomeration", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_array_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_feature_agglomeration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_hierarchical", - "imports": [ - "from tempfile import mkdtemp", - "import shutil", - "import pytest", - "from functools import partial", - "import numpy as np", - "from scipy import sparse", - "from scipy.cluster import hierarchy", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.cluster import ward_tree", - "from sklearn.cluster import AgglomerativeClustering", - "from sklearn.cluster import FeatureAgglomeration", - "from sklearn.cluster._agglomerative import _hc_cut", - "from sklearn.cluster._agglomerative import _TREE_BUILDERS", - "from sklearn.cluster._agglomerative import linkage_tree", - "from sklearn.cluster._agglomerative import _fix_connectivity", - "from sklearn.feature_extraction.image import grid_to_graph", - "from sklearn.metrics.pairwise import PAIRED_DISTANCES", - "from sklearn.metrics.pairwise import cosine_distances", - "from sklearn.metrics.pairwise import manhattan_distances", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.metrics.cluster import normalized_mutual_info_score", - "from sklearn.neighbors import kneighbors_graph", - "from sklearn.cluster._hierarchical_fast import average_merge", - "from sklearn.cluster._hierarchical_fast import max_merge", - "from sklearn.utils._fast_dict import IntFloatDict", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.datasets import make_moons", - "from sklearn.datasets import make_circles" - ], - "classes": [], - "functions": [ - { - "name": "test_linkage_misc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_structured_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unstructured_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_height_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_wrong_arg_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_cosine_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ward_agglomeration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_linkage_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assess_same_labelling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Util for comparison with scipy" - }, - { - "name": "test_sparse_scikit_vs_scipy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vector_scikit_single_vs_scipy_single", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_identical_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_propagation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ward_tree_children_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ward_linkage_tree_return_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_fixing_non_lil", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_int_float_dict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_ignores_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_full_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agg_n_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_passed_to_fix_connectivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_with_distance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_distance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_distances_with_distance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_with_distance_threshold_edge_case", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dist_threshold_invalid_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_shape_precomputed_dist_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_k_means", - "imports": [ - "import re", - "import sys", - "import numpy as np", - "from scipy import sparse as sp", - "from threadpoolctl import threadpool_limits", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils.fixes import _astype_copy_false", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils.extmath import row_norms", - "from sklearn.metrics import pairwise_distances", - "from sklearn.metrics import pairwise_distances_argmin", - "from sklearn.metrics.cluster import v_measure_score", - "from sklearn.cluster import KMeans", - "from sklearn.cluster import k_means", - "from sklearn.cluster import kmeans_plusplus", - "from sklearn.cluster import MiniBatchKMeans", - "from sklearn.cluster._kmeans import _labels_inertia", - "from sklearn.cluster._kmeans import _mini_batch_step", - "from sklearn.cluster._k_means_fast import _relocate_empty_clusters_dense", - "from sklearn.cluster._k_means_fast import _relocate_empty_clusters_sparse", - "from sklearn.cluster._k_means_fast import _euclidean_dense_dense_wrapper", - "from sklearn.cluster._k_means_fast import _euclidean_sparse_dense_wrapper", - "from sklearn.cluster._k_means_fast import _inertia_dense", - "from sklearn.cluster._k_means_fast import _inertia_sparse", - "from sklearn.datasets import make_blobs", - "from io import StringIO" - ], - "classes": [], - "functions": [ - { - "name": "test_kmeans_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_relocated_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_relocate_empty_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_elkan_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_update_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_fitted_model", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_partial_fit_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fortran_aligned_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_means_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_warning_init_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_n_init_precomputed_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_sensible_reassign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_reassign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_with_many_reassignments", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_init_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_copyx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_integer_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_means_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_float_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_centers_not_mutated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_init_fitted_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_warns_less_centers_than_unique_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sort_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_vs_repeated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unit_weights_vs_no_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaled_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_elkan_iter_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_empty_cluster_relocated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_result_of_kmeans_equal_in_diff_n_threads", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precompute_distance_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_jobs_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_deprecated_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_elkan_1_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_means_1_iteration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inertia", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_dataorder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_mean_shift", - "imports": [ - "import numpy as np", - "import warnings", - "import pytest", - "from scipy import sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.cluster import MeanShift", - "from sklearn.cluster import mean_shift", - "from sklearn.cluster import estimate_bandwidth", - "from sklearn.cluster import get_bin_seeds", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import v_measure_score" - ], - "classes": [], - "functions": [ - { - "name": "test_estimate_bandwidth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimate_bandwidth_1sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_shift", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_shift_negative_bandwidth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimate_bandwidth_with_sparse_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_meanshift_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_meanshift_all_orphans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_intensity_tie", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_seeds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_shift_zero_bandwidth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_optics", - "imports": [ - "import platform", - "import sys", - "import numpy as np", - "import pytest", - "from sklearn.datasets import make_blobs", - "from sklearn.cluster import OPTICS", - "from sklearn.cluster._optics import _extend_region", - "from sklearn.cluster._optics import _extract_xi_labels", - "from sklearn.metrics.cluster import contingency_matrix", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.cluster import DBSCAN", - "from sklearn.utils import shuffle", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.utils import _IS_32BIT" - ], - "classes": [], - "functions": [ - { - "name": "test_extend_downward", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extend_upward", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_the_extract_xi_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_xi", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_hierarchy_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_number_of_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minimum_number_of_sample_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_extract", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_reachability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_close_extract", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_optics_parity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_edge_case", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_cluster_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_cluster_size_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_cluster_size_invalid2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_processing_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compare_to_ELKI", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_cluster_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_dbscan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_dists", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_spectral", - "imports": [ - "import re", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "import pickle", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.cluster import SpectralClustering", - "from sklearn.cluster import spectral_clustering", - "from sklearn.cluster._spectral import discretize", - "from sklearn.feature_extraction import img_to_graph", - "from sklearn.metrics import pairwise_distances", - "from sklearn.metrics import adjusted_rand_score", - "from sklearn.metrics.pairwise import kernel_metrics", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.datasets import make_blobs", - "from pyamg import smoothed_aggregation_solver" - ], - "classes": [], - "functions": [ - { - "name": "test_spectral_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_unknown_mode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_unknown_assign_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_clustering_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_nearest_neighbors_filtering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_clustering_with_arpack_amg_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.compose._column_transformer", - "imports": [ - "from itertools import chain", - "import numpy as np", - "from scipy import sparse", - "from joblib import Parallel", - "from base import clone", - "from base import TransformerMixin", - "from utils._estimator_html_repr import _VisualBlock", - "from pipeline import _fit_transform_one", - "from pipeline import _transform_one", - "from pipeline import _name_estimators", - "from preprocessing import FunctionTransformer", - "from utils import Bunch", - "from utils import _safe_indexing", - "from utils import _get_column_indices", - "from utils import _determine_key_type", - "from utils.metaestimators import _BaseComposition", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "ColumnTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "transformers", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of (name, transformer, columns) tuples specifying the transformer objects to be applied to subsets of the data. name : str Like in Pipeline and FeatureUnion, this allows the transformer and its parameters to be set using ``set_params`` and searched in grid search. transformer : {'drop', 'passthrough'} or estimator Estimator must support :term:`fit` and :term:`transform`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns or to pass them through untransformed, respectively. columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable Indexes the data on its second axis. Integers are interpreted as positional columns, while strings can reference DataFrame columns by name. A scalar string or int should be used where ``transformer`` expects X to be a 1d array-like (vector), otherwise a 2d array will be passed to the transformer. A callable is passed the input data `X` and can return any of the above. To select multiple columns by name or dtype, you can use :obj:`make_column_selector`." - }, - { - "name": "remainder", - "type": "Literal['drop', 'passthrough']", - "hasDefault": true, - "default": "'drop'", - "limitation": null, - "ignored": false, - "docstring": "By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`. Note that using this feature requires that the DataFrame columns input at :term:`fit` and :term:`transform` have identical order." - }, - { - "name": "sparse_threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If the output of the different transformers contains sparse matrices, these will be stacked as a sparse matrix if the overall density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all dense data, the stacked result will be dense, and this keyword will be ignored." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "transformer_weights", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multiplicative weights for features per transformer. The output of the transformer is multiplied by these weights. Keys are transformer names, values the weights." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformers` of the\n`ColumnTransformer`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that you\ncan directly set the parameters of the estimators contained in\n`transformers` of `ColumnTransformer`.\n\nReturns\n-------\nself" - }, - { - "name": "_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder." - }, - { - "name": "_validate_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_column_callables", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Converts callable column specifications." - }, - { - "name": "_validate_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validates ``remainder`` and defines ``_remainder`` targeting\nthe remaining columns." - }, - { - "name": "named_transformers_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Access the fitted transformer by name.\n\nRead-only attribute to access any transformer by given name.\nKeys are transformer names and values are the fitted transformer\nobjects." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform." - }, - { - "name": "_update_fitted_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure that the output of each transformer is 2D. Otherwise\nhstack can raise an error or produce incorrect results." - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function to fit and/or transform on demand.\n\nReturn value (transformers and/or transformed X data) depends\non the passed function.\n``fitted=True`` ensures the fitted transformers are used." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, of which specified subsets are used to fit the transformers." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,...), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : ColumnTransformer\n This estimator" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, of which specified subsets are used to fit the transformers." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed by subset." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n The data to be transformed by subset.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices." - }, - { - "name": "_hstack", - "decorators": [], - "parameters": [ - { - "name": "Xs", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.\n\nParameters\n----------\nXs : list of {array-like, sparse matrix, dataframe}" - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ntransformers : list of tuples\n List of (name, transformer, columns) tuples specifying the\n transformer objects to be applied to subsets of the data.\n\n name : str\n Like in Pipeline and FeatureUnion, this allows the transformer and\n its parameters to be set using ``set_params`` and searched in grid\n search.\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n Note that using this feature requires that the DataFrame columns\n input at :term:`fit` and :term:`transform` have identical order.\n\nsparse_threshold : float, default=0.3\n If the output of the different transformers contains sparse matrices,\n these will be stacked as a sparse matrix if the overall density is\n lower than this value. Use ``sparse_threshold=0`` to always return\n dense. When the transformed output consists of all dense data, the\n stacked result will be dense, and this keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer. The output of the\n transformer is multiplied by these weights. Keys are transformer names,\n values the weights.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nAttributes\n----------\ntransformers_ : list\n The collection of fitted transformers as tuples of\n (name, fitted_transformer, column). `fitted_transformer` can be an\n estimator, 'drop', or 'passthrough'. In case there were no columns\n selected, this will be the unfitted transformer.\n If there are remaining columns, the final element is a tuple of the\n form:\n ('remainder', transformer, remaining_columns) corresponding to the\n ``remainder`` parameter. If there are remaining columns, then\n ``len(transformers_)==len(transformers)+1``, otherwise\n ``len(transformers_)==len(transformers)``.\n\nnamed_transformers_ : :class:`~sklearn.utils.Bunch`\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\nsparse_output_ : bool\n Boolean flag indicating whether the output of ``transform`` is a\n sparse matrix or a dense numpy array, which depends on the output\n of the individual transformers and the `sparse_threshold` keyword.\n\nNotes\n-----\nThe order of the columns in the transformed feature matrix follows the\norder of how the columns are specified in the `transformers` list.\nColumns of the original feature matrix that are not specified are\ndropped from the resulting transformed feature matrix, unless specified\nin the `passthrough` keyword. Those columns specified with `passthrough`\nare added at the right to the output of the transformers.\n\nSee Also\n--------\nmake_column_transformer : Convenience function for\n combining the outputs of multiple transformer objects applied to\n column subsets of the original feature space.\nmake_column_selector : Convenience function for selecting\n columns based on datatype or the columns name with a regex pattern.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.compose import ColumnTransformer\n>>> from sklearn.preprocessing import Normalizer\n>>> ct = ColumnTransformer(\n... [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n... (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n>>> X = np.array([[0., 1., 2., 2.],\n... [1., 1., 0., 1.]])\n>>> # Normalizer scales each row of X to unit norm. A separate scaling\n>>> # is applied for the two first and two last elements of each\n>>> # row independently.\n>>> ct.fit_transform(X)\narray([[0. , 1. , 0.5, 0.5],\n [0.5, 0.5, 0. , 1. ]])" - }, - { - "name": "make_column_selector", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "pattern", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of columns containing this regex pattern will be included. If None, column selection will not be selected based on pattern." - }, - { - "name": "dtype_include", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A selection of dtypes to include. For more details, see :meth:`pandas.DataFrame.select_dtypes`." - }, - { - "name": "dtype_exclude", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A selection of dtypes to exclude. For more details, see :meth:`pandas.DataFrame.select_dtypes`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Create a callable to select columns to be used with\n:class:`ColumnTransformer`.\n\n:func:`make_column_selector` can select columns based on datatype or the\ncolumns name with a regex. When using multiple selection criteria, **all**\ncriteria must match for a column to be selected.\n\nParameters\n----------\npattern : str, default=None\n Name of columns containing this regex pattern will be included. If\n None, column selection will not be selected based on pattern.\n\ndtype_include : column dtype or list of column dtypes, default=None\n A selection of dtypes to include. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\ndtype_exclude : column dtype or list of column dtypes, default=None\n A selection of dtypes to exclude. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\nReturns\n-------\nselector : callable\n Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> from sklearn.compose import make_column_selector\n>>> import pandas as pd # doctest: +SKIP\n>>> X = pd.DataFrame({'city': ['London', 'London', 'Paris', 'Sallisaw'],\n... 'rating': [5, 3, 4, 5]}) # doctest: +SKIP\n>>> ct = make_column_transformer(\n... (StandardScaler(),\n... make_column_selector(dtype_include=np.number)), # rating\n... (OneHotEncoder(),\n... make_column_selector(dtype_include=object))) # city\n>>> ct.fit_transform(X) # doctest: +SKIP\narray([[ 0.90453403, 1. , 0. , 0. ],\n [-1.50755672, 1. , 0. , 0. ],\n [-0.30151134, 0. , 1. , 0. ],\n [ 0.90453403, 0. , 0. , 1. ]])" - } - ], - "functions": [ - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Use check_array only on lists and other non-array-likes / sparse" - }, - { - "name": "_is_empty_column_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the column selection is empty (empty list or all-False\nboolean array)." - }, - { - "name": "_get_transformer_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Construct (name, trans, column) tuples from list" - }, - { - "name": "make_column_transformer", - "decorators": [], - "parameters": [ - { - "name": "*transformers", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tuples of the form (transformer, columns) specifying the transformer objects to be applied to subsets of the data. transformer : {'drop', 'passthrough'} or estimator Estimator must support :term:`fit` and :term:`transform`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns or to pass them through untransformed, respectively. columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable Indexes the data on its second axis. Integers are interpreted as positional columns, while strings can reference DataFrame columns by name. A scalar string or int should be used where ``transformer`` expects X to be a 1d array-like (vector), otherwise a 2d array will be passed to the transformer. A callable is passed the input data `X` and can return any of the above. To select multiple columns by name or dtype, you can use :obj:`make_column_selector`." - }, - { - "name": "remainder", - "type": "Literal['drop', 'passthrough']", - "hasDefault": true, - "default": "'drop'", - "limitation": null, - "ignored": false, - "docstring": "By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`." - }, - { - "name": "sparse_threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If the transformed output consists of a mix of sparse and dense data, it will be stacked as a sparse matrix if the density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all sparse or all dense data, the stacked result will be sparse or dense, respectively, and this keyword will be ignored." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct a ColumnTransformer from the given transformers.\n\nThis is a shorthand for the ColumnTransformer constructor; it does not\nrequire, and does not permit, naming the transformers. Instead, they will\nbe given names automatically based on their types. It also does not allow\nweighting with ``transformer_weights``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*transformers : tuples\n Tuples of the form (transformer, columns) specifying the\n transformer objects to be applied to subsets of the data.\n\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n\nsparse_threshold : float, default=0.3\n If the transformed output consists of a mix of sparse and dense data,\n it will be stacked as a sparse matrix if the density is lower than this\n value. Use ``sparse_threshold=0`` to always return dense.\n When the transformed output consists of all sparse or all dense data,\n the stacked result will be sparse or dense, respectively, and this\n keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nct : ColumnTransformer\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> make_column_transformer(\n... (StandardScaler(), ['numerical_column']),\n... (OneHotEncoder(), ['categorical_column']))\nColumnTransformer(transformers=[('standardscaler', StandardScaler(...),\n ['numerical_column']),\n ('onehotencoder', OneHotEncoder(...),\n ['categorical_column'])])" - } - ] - }, - { - "name": "sklearn.compose._target", - "imports": [ - "import warnings", - "import numpy as np", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import clone", - "from utils.validation import check_is_fitted", - "from utils import check_array", - "from utils import _safe_indexing", - "from preprocessing import FunctionTransformer", - "from utils.validation import _deprecate_positional_args", - "from exceptions import NotFittedError", - "from linear_model import LinearRegression" - ], - "classes": [ - { - "name": "TransformedTargetRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "regressor", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regressor object such as derived from ``RegressorMixin``. This regressor will automatically be cloned each time prior to fitting. If regressor is ``None``, ``LinearRegression()`` is created and used." - }, - { - "name": "transformer", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object such as derived from ``TransformerMixin``. Cannot be set at the same time as ``func`` and ``inverse_func``. If ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``, the transformer will be an identity transformer. Note that the transformer will be cloned during fitting. Also, the transformer is restricting ``y`` to be a numpy array." - }, - { - "name": "func", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function to apply to ``y`` before passing to ``fit``. Cannot be set at the same time as ``transformer``. The function needs to return a 2-dimensional array. If ``func`` is ``None``, the function used will be the identity function." - }, - { - "name": "inverse_func", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function to apply to the prediction of the regressor. Cannot be set at the same time as ``transformer`` as well. The function needs to return a 2-dimensional array. The inverse function is used to return predictions to the same space of the original training labels." - }, - { - "name": "check_inverse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to check that ``transform`` followed by ``inverse_transform`` or ``func`` followed by ``inverse_func`` leads to the original targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check transformer and fit transformer.\n\nCreate the default transformer, fit it and make additional inverse\ncheck on a subset (optional)." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the underlying regressor." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\n**fit_params : dict\n Parameters passed to the ``fit`` method of the underlying\n regressor.\n\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the base regressor, applying inverse.\n\nThe regressor is used to predict and the ``inverse_func`` or\n``inverse_transform`` is applied before returning the prediction.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_hat : ndarray of shape (n_samples,)\n Predicted values." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target ``y`` in\nregression problems. This transformation can be given as a Transformer\nsuch as the QuantileTransformer or as a function and its inverse such as\n``log`` and ``exp``.\n\nThe computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\nor::\n\n regressor.fit(X, transformer.transform(y))\n\nThe computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\nor::\n\n transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nregressor : object, default=None\n Regressor object such as derived from ``RegressorMixin``. This\n regressor will automatically be cloned each time prior to fitting.\n If regressor is ``None``, ``LinearRegression()`` is created and used.\n\ntransformer : object, default=None\n Estimator object such as derived from ``TransformerMixin``. Cannot be\n set at the same time as ``func`` and ``inverse_func``. If\n ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,\n the transformer will be an identity transformer. Note that the\n transformer will be cloned during fitting. Also, the transformer is\n restricting ``y`` to be a numpy array.\n\nfunc : function, default=None\n Function to apply to ``y`` before passing to ``fit``. Cannot be set at\n the same time as ``transformer``. The function needs to return a\n 2-dimensional array. If ``func`` is ``None``, the function used will be\n the identity function.\n\ninverse_func : function, default=None\n Function to apply to the prediction of the regressor. Cannot be set at\n the same time as ``transformer`` as well. The function needs to return\n a 2-dimensional array. The inverse function is used to return\n predictions to the same space of the original training labels.\n\ncheck_inverse : bool, default=True\n Whether to check that ``transform`` followed by ``inverse_transform``\n or ``func`` followed by ``inverse_func`` leads to the original targets.\n\nAttributes\n----------\nregressor_ : object\n Fitted regressor.\n\ntransformer_ : object\n Transformer used in ``fit`` and ``predict``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.compose import TransformedTargetRegressor\n>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n... func=np.log, inverse_func=np.exp)\n>>> X = np.arange(4).reshape(-1, 1)\n>>> y = np.exp(2 * X).ravel()\n>>> tt.fit(X, y)\nTransformedTargetRegressor(...)\n>>> tt.score(X, y)\n1.0\n>>> tt.regressor_.coef_\narray([2.])\n\nNotes\n-----\nInternally, the target ``y`` is always converted into a 2-dimensional array\nto be used by scikit-learn transformers. At the time of prediction, the\noutput will be reshaped to a have the same number of dimensions as ``y``.\n\nSee :ref:`examples/compose/plot_transformed_target.py\n`." - } - ], - "functions": [] - }, - { - "name": "sklearn.compose", - "imports": [ - "from _column_transformer import ColumnTransformer", - "from _column_transformer import make_column_transformer", - "from _column_transformer import make_column_selector", - "from _target import TransformedTargetRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.compose.tests.test_column_transformer", - "imports": [ - "import re", - "import pickle", - "import warnings", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "from numpy.testing import assert_allclose", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.base import BaseEstimator", - "from sklearn.compose import ColumnTransformer", - "from sklearn.compose import make_column_transformer", - "from sklearn.compose import make_column_selector", - "from sklearn.exceptions import NotFittedError", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import Normalizer", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.feature_extraction import DictVectorizer" - ], - "classes": [ - { - "name": "Trans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DoubleTrans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SparseMatrixTrans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TransNo2D", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TransRaise", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_column_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_empty_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_stacking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_mixed_cols_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_error_msg_1D", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2D_transformer_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2D_transformer_output_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_invalid_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_invalid_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_named_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_cloning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_feature_names_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_special_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder_numpy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_no_remaining_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_drops_all_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_drop_all_sparse_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_set_params_with_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_no_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_no_estimators_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_callable_specifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_callable_specifier_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_negative_column_indexes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_reordered_column_names_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Regression test for issue #14223: 'Named col indexing fails with\nColumnTransformer remainder on changing DataFrame column ordering'\n\nShould raise error on changed order combined with remainder.\nShould allow for added columns in `transform` input DataFrame\nas long as all preceding columns match." - }, - { - "name": "test_feature_name_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests if the proper warning/error is raised if the columns do not match\nduring fit and transform." - }, - { - "name": "test_column_transformer_mask_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_selector_with_select_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_with_make_column_selector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_selector_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_selector_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_names_empty_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder_fitted_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder_fitted_numpy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.compose.tests.test_target", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.base import clone", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.dummy import DummyRegressor", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.pipeline import Pipeline", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn import datasets", - "from sklearn.compose import TransformedTargetRegressor" - ], - "classes": [ - { - "name": "DummyCheckerArrayTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyCheckerListRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy transformer which count how many time fit was called." - }, - { - "name": "DummyRegressorWithExtraFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_transform_target_regressor_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_invertible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_standard_scaled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_shifted_by_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_functions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_functions_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_1d_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_2d_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_2d_transformer_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_multi_to_single", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_ensure_y_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_count_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_pass_fit_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_route_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.compose.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.covariance._elliptic_envelope", - "imports": [ - "import numpy as np", - "from None import MinCovDet", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from metrics import accuracy_score", - "from base import OutlierMixin" - ], - "classes": [ - { - "name": "EllipticEnvelope", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the support of robust location and covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment." - }, - { - "name": "support_fraction", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportion of points to be included in the support of the raw MCD estimate. If None, the minimum value of support_fraction will be used within the algorithm: `[n_sample + n_features + 1] / 2`. Range is (0, 1)." - }, - { - "name": "contamination", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Range is (0, 0.5)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the EllipticEnvelope model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of the given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,)\n Decision function of the samples.\n It is equal to the shifted Mahalanobis distances.\n The threshold for being an outlier is 0, which ensures a\n compatibility with other outlier detection algorithms." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative Mahalanobis distances.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nnegative_mahal_distances : array-like of shape (n_samples,)\n Opposite of the Mahalanobis distances." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to the\nfitted model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) w.r.t. y." - } - ], - "docstring": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of robust location and covariance estimates\n is computed, and a covariance estimate is recomputed from it,\n without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. If None, the minimum value of support_fraction will\n be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n Range is (0, 1).\n\ncontamination : float, default=0.1\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Range is (0, 0.5).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling\n the data. Pass an int for reproducible results across multiple function\n calls. See :term: `Glossary `.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute the\n robust estimates of location and shape.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: ``decision_function = score_samples - offset_``.\n The offset depends on the contamination parameter and is defined in\n such a way we obtain the expected number of outliers (samples with\n decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EllipticEnvelope\n>>> true_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n... cov=true_cov,\n... size=500)\n>>> cov = EllipticEnvelope(random_state=0).fit(X)\n>>> # predict returns 1 for an inlier and -1 for an outlier\n>>> cov.predict([[0, 0],\n... [3, 3]])\narray([ 1, -1])\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nSee Also\n--------\nEmpiricalCovariance, MinCovDet\n\nNotes\n-----\nOutlier detection from covariance estimation may break or not\nperform well in high-dimensional settings. In particular, one will\nalways take care to work with ``n_samples > n_features ** 2``.\n\nReferences\n----------\n.. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n minimum covariance determinant estimator\" Technometrics 41(3), 212\n (1999)" - } - ], - "functions": [] - }, - { - "name": "sklearn.covariance._empirical_covariance", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from utils import check_array", - "from utils.extmath import fast_logdet", - "from metrics.pairwise import pairwise_distances", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "EmpiricalCovariance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data are centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_covariance", - "decorators": [], - "parameters": [ - { - "name": "covariance", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated covariance matrix to be stored, and from which precision is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Saves the covariance and precision estimates\n\nStorage is done accordingly to `self.store_precision`.\nPrecision stored only if invertible.\n\nParameters\n----------\ncovariance : array-like of shape (n_features, n_features)\n Estimated covariance matrix to be stored, and from which precision\n is computed." - }, - { - "name": "get_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Getter for the precision matrix.\n\nReturns\n-------\nprecision_ : array-like of shape (n_features, n_features)\n The precision matrix associated to the current covariance object." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the Maximum Likelihood Estimator covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X_test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data of which we compute the likelihood, where n_samples is the number of samples and n_features is the number of features. X_test is assumed to be drawn from the same distribution than the data used in fit (including centering)." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the log-likelihood of a Gaussian data set with\n`self.covariance_` as an estimator of its covariance matrix.\n\nParameters\n----------\nX_test : array-like of shape (n_samples, n_features)\n Test data of which we compute the likelihood, where n_samples is\n the number of samples and n_features is the number of features.\n X_test is assumed to be drawn from the same distribution than\n the data used in fit (including centering).\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nres : float\n The likelihood of the data set with `self.covariance_` as an\n estimator of its covariance matrix." - }, - { - "name": "error_norm", - "decorators": [], - "parameters": [ - { - "name": "comp_cov", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The covariance to compare with." - }, - { - "name": "norm", - "type": "Literal[\"frobenius\", \"spectral\"]", - "hasDefault": true, - "default": "\"frobenius\"", - "limitation": null, - "ignored": false, - "docstring": "The type of norm used to compute the error. Available error types: - 'frobenius' (default): sqrt(tr(A^t.A)) - 'spectral': sqrt(max(eigenvalues(A^t.A)) where A is the error ``(comp_cov - self.covariance_)``." - }, - { - "name": "scaling", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True (default), the squared error norm is divided by n_features. If False, the squared error norm is not rescaled." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to compute the squared error norm or the error norm. If True (default), the squared error norm is returned. If False, the error norm is returned." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the Mean Squared Error between two covariance estimators.\n(In the sense of the Frobenius norm).\n\nParameters\n----------\ncomp_cov : array-like of shape (n_features, n_features)\n The covariance to compare with.\n\nnorm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n The type of norm used to compute the error. Available error types:\n - 'frobenius' (default): sqrt(tr(A^t.A))\n - 'spectral': sqrt(max(eigenvalues(A^t.A))\n where A is the error ``(comp_cov - self.covariance_)``.\n\nscaling : bool, default=True\n If True (default), the squared error norm is divided by n_features.\n If False, the squared error norm is not rescaled.\n\nsquared : bool, default=True\n Whether to compute the squared error norm or the error norm.\n If True (default), the squared error norm is returned.\n If False, the error norm is returned.\n\nReturns\n-------\nresult : float\n The Mean Squared Error (in the sense of the Frobenius norm) between\n `self` and `comp_cov` covariance estimators." - }, - { - "name": "mahalanobis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations, the Mahalanobis distances of the which we compute. Observations are assumed to be drawn from the same distribution than the data used in fit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the squared Mahalanobis distances of given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The observations, the Mahalanobis distances of the which we\n compute. Observations are assumed to be drawn from the same\n distribution than the data used in fit.\n\nReturns\n-------\ndist : ndarray of shape (n_samples,)\n Squared Mahalanobis distances of the observations." - } - ], - "docstring": "Maximum likelihood covariance estimator\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specifies if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo-inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EmpiricalCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = EmpiricalCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7569..., 0.2818...],\n [0.2818..., 0.3928...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])" - } - ], - "functions": [ - { - "name": "log_likelihood", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum Likelihood Estimator of covariance." - }, - { - "name": "precision", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The precision matrix of the covariance model to be tested." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the sample mean of the log_likelihood under a covariance model\n\ncomputes the empirical expected log-likelihood (accounting for the\nnormalization terms and scaling), allowing for universal comparison (beyond\nthis software package)\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Maximum Likelihood Estimator of covariance.\n\nprecision : ndarray of shape (n_features, n_features)\n The precision matrix of the covariance model to be tested.\n\nReturns\n-------\nlog_likelihood_ : float\n Sample mean of the log-likelihood." - }, - { - "name": "empirical_covariance", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data will be centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the Maximum likelihood covariance estimator\n\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n Empirical covariance (Maximum Likelihood Estimator).\n\nExamples\n--------\n>>> from sklearn.covariance import empirical_covariance\n>>> X = [[1,1,1],[1,1,1],[1,1,1],\n... [0,0,0],[0,0,0],[0,0,0]]\n>>> empirical_covariance(X)\narray([[0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25]])" - } - ] - }, - { - "name": "sklearn.covariance._graph_lasso", - "imports": [ - "from collections.abc import Sequence", - "import warnings", - "import operator", - "import sys", - "import time", - "import numpy as np", - "from scipy import linalg", - "from joblib import Parallel", - "from None import empirical_covariance", - "from None import EmpiricalCovariance", - "from None import log_likelihood", - "from exceptions import ConvergenceWarning", - "from utils.validation import check_random_state", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from linear_model import _cd_fast as cd_fast", - "from linear_model import lars_path_gram", - "from model_selection import check_cv", - "from model_selection import cross_val_score", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "GraphicalLasso", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If verbose is True, the objective function and dual gap are plotted at each iteration." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data are centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the GraphicalLasso model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n plotted at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n [0.049, 0.364, 0.017, 0.034],\n [0.218, 0.017, 0.322, 0.093],\n [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLassoCV" - }, - { - "name": "GraphicalLassoCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alphas", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "4", - "limitation": null, - "ignored": false, - "docstring": "If an integer is given, it fixes the number of points on the grids of alpha to be used. If a list is given, it gives the grid to be used. See the notes in the class docstring for more details. Range is (0, inf] when floats given." - }, - { - "name": "n_refinements", - "type": "int", - "hasDefault": true, - "default": "4", - "limitation": null, - "ignored": false, - "docstring": "The number of times the grid is refined. Not used if explicit values of alphas are passed. Range is [1, inf)." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.20 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where number of features is greater than number of samples. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If verbose is True, the objective function and duality gap are printed at each iteration." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data are centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the GraphicalLasso covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - }, - { - "name": "grid_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "cv_alphas_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n If an integer is given, it fixes the number of points on the\n grids of alpha to be used. If a list is given, it gives the\n grid to be used. See the notes in the class docstring for\n more details. Range is (0, inf] when floats given.\n\nn_refinements : int, default=4\n The number of times the grid is refined. Not used if explicit\n values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.20\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where number of features is greater\n than number of samples. Elsewhere prefer cd which is more numerically\n stable.\n\nn_jobs : int, default=None\n number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If verbose is True, the objective function and duality gap are\n printed at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n Penalization parameter selected.\n\ncv_alphas_ : list of shape (n_alphas,), dtype=float\n All penalization parameters explored.\n\n .. deprecated:: 0.24\n The `cv_alphas_` attribute is deprecated in version 0.24 in favor\n of `cv_results_['alphas']` and will be removed in version\n 1.1 (renaming of 0.26).\n\ngrid_scores_ : ndarray of shape (n_alphas, n_folds)\n Log-likelihood score on left-out data across folds.\n\n .. deprecated:: 0.24\n The `grid_scores_` attribute is deprecated in version 0.24 in favor\n of `cv_results_` and will be removed in version\n 1.1 (renaming of 0.26).\n\ncv_results_ : dict of ndarrays\n A dict with keys:\n\n alphas : ndarray of shape (n_alphas,)\n All penalization parameters explored.\n\n split(k)_score : ndarray of shape (n_alphas,)\n Log-likelihood score on left-out data across (k)th fold.\n\n mean_score : ndarray of shape (n_alphas,)\n Mean of scores over the folds.\n\n std_score : ndarray of shape (n_alphas,)\n Standard deviation of scores over the folds.\n\n .. versionadded:: 0.24\n\nn_iter_ : int\n Number of iterations run for the optimal alpha.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n [0.051, 0.364, 0.018, 0.036],\n [0.22 , 0.018, 0.322, 0.094],\n [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLasso\n\nNotes\n-----\nThe search for the optimal penalization parameter (alpha) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of alpha then come out as missing values, but the optimum may\nbe close to these missing values." - } - ], - "functions": [ - { - "name": "_objective", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluation of the graphical-lasso objective function\n\nthe objective function is made of a shifted scaled version of the\nnormalized log-likelihood (i.e. its empirical mean over the samples) and a\npenalisation term to promote sparsity" - }, - { - "name": "_dual_gap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Expression of the dual gap convergence criterion\n\nThe specific definition is given in Duchi \"Projected Subgradient Methods\nfor Learning Sparse Gaussians\"." - }, - { - "name": "alpha_max", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample covariance matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the maximum alpha for which there are some non-zeros off-diagonal.\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n The sample covariance matrix.\n\nNotes\n-----\nThis results from the bound for the all the Lasso that are solved\nin GraphicalLasso: each time, the row of cov corresponds to Xy. As the\nbound for alpha is given by `max(abs(Xy))`, the result follows." - }, - { - "name": "graphical_lasso", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Empirical covariance from which to compute the covariance estimate." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]." - }, - { - "name": "cov_init", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial guess for the covariance. If None, then the empirical covariance is used." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If verbose is True, the objective function and dual gap are printed at each iteration." - }, - { - "name": "return_costs", - "type": "bool", - "hasDefault": true, - "default": "Flase", - "limitation": null, - "ignored": false, - "docstring": "If return_costs is True, the objective function and dual gap at each iteration are returned." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "eps", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Default is `np.finfo(np.float64).eps`." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "l1-penalized covariance estimator\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n graph_lasso has been renamed to graphical_lasso\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Empirical covariance from which to compute the covariance estimate.\n\nalpha : float\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance. If None, then the empirical\n covariance is used.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n printed at each iteration.\n\nreturn_costs : bool, default=Flase\n If return_costs is True, the objective function and dual gap\n at each iteration are returned.\n\neps : float, default=eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Default is `np.finfo(np.float64).eps`.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The estimated covariance matrix.\n\nprecision : ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrix.\n\ncosts : list of (objective, dual_gap) pairs\n The list of values of the objective function and the dual gap at\n each iteration. Returned only if return_costs is True.\n\nn_iter : int\n Number of iterations. Returned only if `return_n_iter` is set to True.\n\nSee Also\n--------\nGraphicalLasso, GraphicalLassoCV\n\nNotes\n-----\nThe algorithm employed to solve this problem is the GLasso algorithm,\nfrom the Friedman 2008 Biostatistics paper. It is the same algorithm\nas in the R `glasso` package.\n\nOne possible difference with the `glasso` R package is that the\ndiagonal coefficients are not penalized." - }, - { - "name": "graphical_lasso_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The list of regularization parameters, decreasing order." - }, - { - "name": "cov_init", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial guess for the covariance." - }, - { - "name": "X_test", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Optional test matrix to measure generalisation error." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. The tolerance must be a positive number." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. The tolerance must be a positive number." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations. This parameter should be a strictly positive integer." - }, - { - "name": "verbose", - "type": "Union[bool, int]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "The higher the verbosity flag, the more information is printed during the fitting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "l1-penalized covariance estimator along a path of decreasing alphas\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nalphas : array-like of shape (n_alphas,)\n The list of regularization parameters, decreasing order.\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance.\n\nX_test : array of shape (n_test_samples, n_features), default=None\n Optional test matrix to measure generalisation error.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. The tolerance must be a positive\n number.\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. The tolerance must be a positive number.\n\nmax_iter : int, default=100\n The maximum number of iterations. This parameter should be a strictly\n positive integer.\n\nverbose : int or bool, default=False\n The higher the verbosity flag, the more information is printed\n during the fitting.\n\nReturns\n-------\ncovariances_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated covariance matrices.\n\nprecisions_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrices.\n\nscores_ : list of shape (n_alphas,), dtype=float\n The generalisation error (log-likelihood) on the test data.\n Returned only if test data is passed." - } - ] - }, - { - "name": "sklearn.covariance._robust_covariance", - "imports": [ - "import warnings", - "import numbers", - "import numpy as np", - "from scipy import linalg", - "from scipy.stats import chi2", - "from None import empirical_covariance", - "from None import EmpiricalCovariance", - "from utils.extmath import fast_logdet", - "from utils import check_random_state", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "MinCovDet", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the support of the robust location and the covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment." - }, - { - "name": "support_fraction", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportion of points to be included in the support of the raw MCD estimate. Default is None, which implies that the minimum value of support_fraction will be used within the algorithm: `(n_sample + n_features + 1) / 2`. The parameter must be in the range (0, 1)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y: Ignored", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits a Minimum Covariance Determinant with the FastMCD algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n\ny: Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - }, - { - "name": "correct_covariance", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply a correction to raw Minimum Covariance Determinant estimates.\n\nCorrection using the empirical correction factor suggested\nby Rousseeuw and Van Driessen in [RVD]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\ncovariance_corrected : ndarray of shape (n_features, n_features)\n Corrected robust covariance estimate.\n\nReferences\n----------\n\n.. [RVD] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS" - }, - { - "name": "reweight_covariance", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Re-weight raw Minimum Covariance Determinant estimates.\n\nRe-weight observations using Rousseeuw's method (equivalent to\ndeleting outlying observations from the data set before\ncomputing location and covariance estimates) described\nin [RVDriessen]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\nlocation_reweighted : ndarray of shape (n_features,)\n Re-weighted robust location estimate.\n\ncovariance_reweighted : ndarray of shape (n_features, n_features)\n Re-weighted robust covariance estimate.\n\nsupport_reweighted : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the re-weighted robust location and covariance estimates.\n\nReferences\n----------\n\n.. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS" - } - ], - "docstring": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of the robust location and the covariance\n estimates is computed, and a covariance estimate is recomputed from\n it, without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is None, which implies that the minimum\n value of support_fraction will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n (0, 1).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the robust estimates of location and shape.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import MinCovDet\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = MinCovDet(random_state=0).fit(X)\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nReferences\n----------\n\n.. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n J. Am Stat Ass, 79:871, 1984.\n.. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n.. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400" - } - ], - "functions": [ - { - "name": "c_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data set in which we look for the n_support observations whose scatter matrix has minimum determinant." - }, - { - "name": "n_support", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of observations to compute the robust estimates of location and covariance from. This parameter must be greater than `n_samples / 2`." - }, - { - "name": "remaining_iterations", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations to perform. According to [Rouseeuw1999]_, two iterations are sufficient to get close to the minimum, and we never need more than 30 to reach convergence." - }, - { - "name": "initial_estimates", - "type": "Tuple[]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial estimates of location and shape from which to run the c_step procedure: - initial_estimates[0]: an initial location estimate - initial_estimates[1]: an initial covariance estimate" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode." - }, - { - "name": "cov_computation_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which will be used to compute the covariance. Must return array of shape (n_features, n_features)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "C_step procedure described in [Rouseeuw1984]_ aiming at computing MCD.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data set in which we look for the n_support observations whose\n scatter matrix has minimum determinant.\n\nn_support : int\n Number of observations to compute the robust estimates of location\n and covariance from. This parameter must be greater than\n `n_samples / 2`.\n\nremaining_iterations : int, default=30\n Number of iterations to perform.\n According to [Rouseeuw1999]_, two iterations are sufficient to get\n close to the minimum, and we never need more than 30 to reach\n convergence.\n\ninitial_estimates : tuple of shape (2,), default=None\n Initial estimates of location and shape from which to run the c_step\n procedure:\n - initial_estimates[0]: an initial location estimate\n - initial_estimates[1]: an initial covariance estimate\n\nverbose : bool, default=False\n Verbose mode.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location estimates.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance estimates.\n\nsupport : ndarray of shape (n_samples,)\n A mask for the `n_support` observations whose scatter matrix has\n minimum determinant.\n\nReferences\n----------\n.. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS" - }, - { - "name": "_c_step", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "select_candidates", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data (sub)set in which we look for the n_support purest observations." - }, - { - "name": "n_support", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples the pure data set must contain. This parameter must be in the range `[(n + p + 1)/2] < n_support < n`." - }, - { - "name": "n_trials", - "type": "Union[Tuple[], int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of different initial sets of observations from which to run the algorithm. This parameter should be a strictly positive integer. Instead of giving a number of trials to perform, one can provide a list of initial estimates that will be used to iteratively run c_step procedures. In this case: - n_trials[0]: array-like, shape (n_trials, n_features) is the list of `n_trials` initial location estimates - n_trials[1]: array-like, shape (n_trials, n_features, n_features) is the list of `n_trials` initial covariances estimates" - }, - { - "name": "select", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of best candidates results to return. This parameter must be a strictly positive integer." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the c_step procedure. (2 is enough to be close to the final solution. \"Never\" exceeds 20). This parameter must be a strictly positive integer." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Control the output verbosity." - }, - { - "name": "cov_computation_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which will be used to compute the covariance. Must return an array of shape (n_features, n_features)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data (sub)set in which we look for the n_support purest observations.\n\nn_support : int\n The number of samples the pure data set must contain.\n This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\nn_trials : int or tuple of shape (2,)\n Number of different initial sets of observations from which to\n run the algorithm. This parameter should be a strictly positive\n integer.\n Instead of giving a number of trials to perform, one can provide a\n list of initial estimates that will be used to iteratively run\n c_step procedures. In this case:\n - n_trials[0]: array-like, shape (n_trials, n_features)\n is the list of `n_trials` initial location estimates\n - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n is the list of `n_trials` initial covariances estimates\n\nselect : int, default=1\n Number of best candidates results to return. This parameter must be\n a strictly positive integer.\n\nn_iter : int, default=30\n Maximum number of iterations for the c_step procedure.\n (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n This parameter must be a strictly positive integer.\n\nverbose : bool, default=False\n Control the output verbosity.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nSee Also\n---------\nc_step\n\nReturns\n-------\nbest_locations : ndarray of shape (select, n_features)\n The `select` location estimates computed from the `select` best\n supports found in the data set (`X`).\n\nbest_covariances : ndarray of shape (select, n_features, n_features)\n The `select` covariance estimates computed from the `select`\n best supports found in the data set (`X`).\n\nbest_supports : ndarray of shape (select, n_samples)\n The `select` best supports found in the data set (`X`).\n\nReferences\n----------\n.. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS" - }, - { - "name": "fast_mcd", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix, with p features and n samples." - }, - { - "name": "support_fraction", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportion of points to be included in the support of the raw MCD estimate. Default is `None`, which implies that the minimum value of `support_fraction` will be used within the algorithm: `(n_sample + n_features + 1) / 2`. This parameter must be in the range (0, 1)." - }, - { - "name": "cov_computation_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which will be used to compute the covariance. Must return an array of shape (n_features, n_features)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimates the Minimum Covariance Determinant matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is `None`, which implies that the minimum\n value of `support_fraction` will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. This parameter must be in the\n range (0, 1).\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location of the data.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance of the features.\n\nsupport : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the robust location and covariance estimates of the data set.\n\nNotes\n-----\nThe FastMCD algorithm has been introduced by Rousseuw and Van Driessen\nin \"A Fast Algorithm for the Minimum Covariance Determinant Estimator,\n1999, American Statistical Association and the American Society\nfor Quality, TECHNOMETRICS\".\nThe principle is to compute robust estimates and random subsets before\npooling them into a larger subsets, and finally into the full data set.\nDepending on the size of the initial sample, we have one, two or three\nsuch computation levels.\n\nNote that only raw estimates are returned. If one is interested in\nthe correction and reweighting steps described in [RouseeuwVan]_,\nsee the MinCovDet object.\n\nReferences\n----------\n\n.. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n\n.. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400" - } - ] - }, - { - "name": "sklearn.covariance._shrunk_covariance", - "imports": [ - "import warnings", - "import numpy as np", - "from None import empirical_covariance", - "from None import EmpiricalCovariance", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "ShrunkCovariance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored" - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data will be centered before computation." - }, - { - "name": "shrinkage", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y: Ignored", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the shrunk covariance model according to the given training data\nand parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny: Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Covariance estimator with shrinkage\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import ShrunkCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = ShrunkCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7387..., 0.2536...],\n [0.2536..., 0.4110...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])\n\nNotes\n-----\nThe regularized covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "LedoitWolf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation." - }, - { - "name": "block_size", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Size of blocks into which the covariance matrix will be split during its Ledoit-Wolf estimation. This is purely a memory optimization and does not affect results." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the Ledoit-Wolf shrunk covariance model according to the given\ntraining data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "LedoitWolf Estimator\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split\n during its Ledoit-Wolf estimation. This is purely a memory\n optimization and does not affect results.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import LedoitWolf\n>>> real_cov = np.array([[.4, .2],\n... [.2, .8]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=50)\n>>> cov = LedoitWolf().fit(X)\n>>> cov.covariance_\narray([[0.4406..., 0.1616...],\n [0.1616..., 0.8022...]])\n>>> cov.location_\narray([ 0.0595... , -0.0075...])\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the Ledoit and Wolf formula (see References)\n\nReferences\n----------\n\"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\nLedoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\nFebruary 2004, pages 365-411." - }, - { - "name": "OAS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the Oracle Approximating Shrinkage covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Oracle Approximating Shrinkage Estimator\n\nRead more in the :ref:`User Guide `.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import OAS\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> oas = OAS().fit(X)\n>>> oas.covariance_\narray([[0.7533..., 0.2763...],\n [0.2763..., 0.3964...]])\n>>> oas.precision_\narray([[ 1.7833..., -1.2431... ],\n [-1.2431..., 3.3889...]])\n>>> oas.shrinkage_\n0.0195...\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the OAS formula (see References)\n\nReferences\n----------\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010." - } - ], - "functions": [ - { - "name": "shrunk_covariance", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Covariance matrix to be shrunk" - }, - { - "name": "shrinkage", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculates a covariance matrix shrunk on the diagonal\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nemp_cov : array-like of shape (n_features, n_features)\n Covariance matrix to be shrunk\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nNotes\n-----\nThe regularized (shrunk) covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "ledoit_wolf_shrinkage", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation." - }, - { - "name": "block_size", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Size of blocks into which the covariance matrix will be split." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n\nReturns\n-------\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "ledoit_wolf", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation." - }, - { - "name": "block_size", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Size of blocks into which the covariance matrix will be split. This is purely a memory optimization and does not affect results." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n This is purely a memory optimization and does not affect results.\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "oas", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate covariance with the Oracle Approximating Shrinkage algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nReturns\n-------\nshrunk_cov : array-like of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularised (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\n\nThe formula we used to implement the OAS is slightly modified compared\nto the one given in the article. See :class:`OAS` for more details." - } - ] - }, - { - "name": "sklearn.covariance", - "imports": [ - "from _empirical_covariance import empirical_covariance", - "from _empirical_covariance import EmpiricalCovariance", - "from _empirical_covariance import log_likelihood", - "from _shrunk_covariance import shrunk_covariance", - "from _shrunk_covariance import ShrunkCovariance", - "from _shrunk_covariance import ledoit_wolf", - "from _shrunk_covariance import ledoit_wolf_shrinkage", - "from _shrunk_covariance import LedoitWolf", - "from _shrunk_covariance import oas", - "from _shrunk_covariance import OAS", - "from _robust_covariance import fast_mcd", - "from _robust_covariance import MinCovDet", - "from _graph_lasso import graphical_lasso", - "from _graph_lasso import GraphicalLasso", - "from _graph_lasso import GraphicalLassoCV", - "from _elliptic_envelope import EllipticEnvelope" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.covariance.tests.test_covariance", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn import datasets", - "from sklearn.covariance import empirical_covariance", - "from sklearn.covariance import EmpiricalCovariance", - "from sklearn.covariance import ShrunkCovariance", - "from sklearn.covariance import shrunk_covariance", - "from sklearn.covariance import LedoitWolf", - "from sklearn.covariance import ledoit_wolf", - "from sklearn.covariance import ledoit_wolf_shrinkage", - "from sklearn.covariance import OAS", - "from sklearn.covariance import oas" - ], - "classes": [], - "functions": [ - { - "name": "test_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shrunk_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ledoit_wolf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_naive_ledoit_wolf_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ledoit_wolf_small", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ledoit_wolf_large", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests.test_elliptic_envelope", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.covariance import EllipticEnvelope", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.exceptions import NotFittedError" - ], - "classes": [], - "functions": [ - { - "name": "test_elliptic_envelope", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests.test_graphical_lasso", - "imports": [ - "import sys", - "import pytest", - "import numpy as np", - "from scipy import linalg", - "from numpy.testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.covariance import graphical_lasso", - "from sklearn.covariance import GraphicalLasso", - "from sklearn.covariance import GraphicalLassoCV", - "from sklearn.covariance import empirical_covariance", - "from sklearn.datasets import make_sparse_spd_matrix", - "from io import StringIO", - "from sklearn.utils import check_random_state", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_graphical_lasso", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graph_lasso_2D", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_iris_singular", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_cv_grid_scores_and_cv_alphas_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_cv_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests.test_robust_covariance", - "imports": [ - "import itertools", - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn import datasets", - "from sklearn.covariance import empirical_covariance", - "from sklearn.covariance import MinCovDet", - "from sklearn.covariance import fast_mcd" - ], - "classes": [], - "functions": [ - { - "name": "test_mcd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fast_mcd_on_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_class_on_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "launch_mcd_on_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_issue1127", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_issue3367", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_support_covariance_is_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_increasing_det_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.cross_decomposition._pls", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.linalg import pinv2", - "from scipy.linalg import svd", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import MultiOutputMixin", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils.extmath import svd_flip", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "_PLS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of predictors.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target vectors, where `n_samples` is the number of samples and\n `n_targets` is the number of response variables." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples to transform." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vectors." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y`, or perform in-place normalization." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Apply the dimension reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to transform.\n\nY : array-like of shape (n_samples, n_targets), default=None\n Target vectors.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nReturns\n-------\n`x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where `n_samples` is the number of samples and `n_components` is the number of pls components." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data back to its original space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data, where `n_samples` is the number of samples\n and `n_components` is the number of pls components.\n\nReturns\n-------\nx_reconstructed : array-like of shape (n_samples, n_features)\n\nNotes\n-----\nThis transformation will only be exact if `n_components=n_features`." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y`, or perform in-place normalization." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict targets of given samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nNotes\n-----\nThis call requires the estimation of a matrix of shape\n`(n_features, n_targets)`, which may be an issue in high dimensional\nspace." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of predictors." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vectors, where n_samples is the number of samples and n_targets is the number of response variables." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn and apply the dimension reduction on the train data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of predictors.\n\ny : array-like of shape (n_samples, n_targets), default=None\n Target vectors, where n_samples is the number of samples and\n n_targets is the number of response variables.\n\nReturns\n-------\nx_scores if Y is not given, (x_scores, y_scores) otherwise." - }, - { - "name": "norm_y_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://www.stat.washington.edu/research/reports/2000/tr371.pdf" - }, - { - "name": "PLSRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "algorithm", - "type": "Literal['nipals', 'svd']", - "hasDefault": true, - "default": "'nipals'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-06", - "limitation": null, - "ignored": false, - "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "PLS regression\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n The maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSRegression\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> pls2 = PLSRegression(n_components=2)\n>>> pls2.fit(X, Y)\nPLSRegression()\n>>> Y_pred = pls2.predict(X)" - }, - { - "name": "PLSCanonical", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "algorithm", - "type": "Literal['nipals', 'svd']", - "hasDefault": true, - "default": "'nipals'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "the maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-06", - "limitation": null, - "ignored": false, - "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSCanonical\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> plsca = PLSCanonical(n_components=2)\n>>> plsca.fit(X, Y)\nPLSCanonical()\n>>> X_c, Y_c = plsca.transform(X, Y)\n\nSee Also\n--------\nCCA\nPLSSVD" - }, - { - "name": "CCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "the maximum number of iterations of the power method." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-06", - "limitation": null, - "ignored": false, - "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import CCA\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> cca = CCA(n_components=1)\n>>> cca.fit(X, Y)\nCCA(n_components=1)\n>>> X_c, Y_c = cca.transform(X, Y)\n\nSee Also\n--------\nPLSCanonical\nPLSSVD" - }, - { - "name": "PLSSVD", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training samples." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Targets." - }, - { - "name": "x_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples to be transformed." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to be transformed.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn and apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise." - } - ], - "docstring": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the crosscovariance matrix X'Y.\nIt is able to project both the training data `X` and the targets `Y`. The\ntraining data X is projected on the left singular vectors, while the\ntargets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n The number of components to keep. Should be in `[1,\n min(n_samples, n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\ny_weights_ : ndarray of (n_targets, n_components)\n The right singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.cross_decomposition import PLSSVD\n>>> X = np.array([[0., 0., 1.],\n... [1., 0., 0.],\n... [2., 2., 2.],\n... [2., 5., 4.]])\n>>> Y = np.array([[0.1, -0.2],\n... [0.9, 1.1],\n... [6.2, 5.9],\n... [11.9, 12.3]])\n>>> pls = PLSSVD(n_components=2).fit(X, Y)\n>>> X_c, Y_c = pls.transform(X, Y)\n>>> X_c.shape, Y_c.shape\n((4, 2), (4, 2))\n\nSee Also\n--------\nPLSCanonical\nCCA" - } - ], - "functions": [ - { - "name": "_get_first_singular_vectors_power_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the first left and right singular vectors of X'Y.\n\nProvides an alternative to the svd(X'Y) and uses the power method instead.\nWith norm_y_weights to True and in mode A, this corresponds to the\nalgorithm section 11.3 of the Wegelin's review, except this starts at the\n\"update saliences\" part." - }, - { - "name": "_get_first_singular_vectors_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the first left and right singular vectors of X'Y.\n\nHere the whole SVD is computed." - }, - { - "name": "_center_scale_xy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Center X, Y and scale if the scale parameter==True\n\nReturns\n-------\n X, Y, x_mean, y_mean, x_std, y_std" - }, - { - "name": "_svd_flip_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Same as svd_flip but works on 1d arrays, and is inplace" - } - ] - }, - { - "name": "sklearn.cross_decomposition", - "imports": [ - "from _pls import PLSCanonical", - "from _pls import PLSRegression", - "from _pls import PLSSVD", - "from _pls import CCA" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.cross_decomposition.tests.test_pls", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_linnerud", - "from sklearn.cross_decomposition._pls import _center_scale_xy", - "from sklearn.cross_decomposition._pls import _get_first_singular_vectors_power_method", - "from sklearn.cross_decomposition._pls import _get_first_singular_vectors_svd", - "from sklearn.cross_decomposition._pls import _svd_flip_1d", - "from sklearn.cross_decomposition import CCA", - "from sklearn.cross_decomposition import PLSSVD", - "from sklearn.cross_decomposition import PLSRegression", - "from sklearn.cross_decomposition import PLSCanonical", - "from sklearn.datasets import make_regression", - "from sklearn.utils import check_random_state", - "from sklearn.utils.extmath import svd_flip", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "assert_matrix_orthogonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pls_canonical_basics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_regression_constant_column_Y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_canonical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_canonical_random", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_attibutes_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_univariate_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_test_scale_and_stability_datasets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate dataset for test_scale_and_stability" - }, - { - "name": "test_scale_and_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "scale=True is equivalent to scale=False on centered/scaled data\nThis allows to check numerical stability over platforms as well" - }, - { - "name": "test_n_components_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_bounds_pls_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scores_deprecations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_norm_y_weights_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_and_std_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_value_helpers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_component_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svd_flip_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cross_decomposition.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.datasets.setup", - "imports": [ - "import numpy", - "import os", - "import platform", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets._base", - "imports": [ - "import csv", - "import hashlib", - "import os", - "import shutil", - "from collections import namedtuple", - "from os import environ", - "from os import listdir", - "from os import makedirs", - "from os.path import dirname", - "from os.path import exists", - "from os.path import expanduser", - "from os.path import isdir", - "from os.path import join", - "from os.path import splitext", - "from utils import Bunch", - "from utils import check_random_state", - "from utils import check_pandas_support", - "from utils.validation import _deprecate_positional_args", - "import numpy as np", - "from urllib.request import urlretrieve", - "from externals._pilutil import imread" - ], - "classes": [], - "functions": [ - { - "name": "get_data_home", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The path to scikit-learn data directory. If `None`, the default path is `~/sklearn_learn_data`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the path of the scikit-learn data dir.\n\nThis folder is used by some large dataset loaders to avoid downloading the\ndata several times.\n\nBy default the data dir is set to a folder named 'scikit_learn_data' in the\nuser home folder.\n\nAlternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\nvariable or programmatically by giving an explicit folder path. The '~'\nsymbol is expanded to the user home folder.\n\nIf the folder does not already exist, it is automatically created.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`." - }, - { - "name": "clear_data_home", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The path to scikit-learn data directory. If `None`, the default path is `~/sklearn_learn_data`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Delete all the content of the data home cache.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`." - }, - { - "name": "_convert_data_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_files", - "decorators": [], - "parameters": [ - { - "name": "container_path", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Path to the main folder holding one subfolder per category" - }, - { - "name": "description", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A paragraph describing the characteristic of the dataset: its source, reference, etc." - }, - { - "name": "categories", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None (default), load all the categories. If not None, list of category names to load (other categories ignored)." - }, - { - "name": "load_content", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to load or not the content of the different files. If true a 'data' attribute containing the text information is present in the data structure returned. If not, a filenames attribute gives the path to the files." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to shuffle the data: might be important for models that make the assumption that the samples are independent and identically distributed (i.i.d.), such as stochastic gradient descent." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, do not try to decode the content of the files (e.g. for images or other non-text content). If not None, encoding to use to decode text files to Unicode if load_content is True." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. Passed as keyword argument 'errors' to bytes.decode." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n container_folder/\n category_1_folder/\n file_1.txt\n file_2.txt\n ...\n file_42.txt\n category_2_folder/\n file_43.txt\n file_44.txt\n ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncontainer_path : str or unicode\n Path to the main folder holding one subfolder per category\n\ndescription : str or unicode, default=None\n A paragraph describing the characteristic of the dataset: its source,\n reference, etc.\n\ncategories : list of str, default=None\n If None (default), load all the categories. If not None, list of\n category names to load (other categories ignored).\n\nload_content : bool, default=True\n Whether to load or not the content of the different files. If true a\n 'data' attribute containing the text information is present in the data\n structure returned. If not, a filenames attribute gives the path to the\n files.\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nencoding : str, default=None\n If None, do not try to decode the content of the files (e.g. for images\n or other non-text content). If not None, encoding to use to decode text\n files to Unicode if load_content is True.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. Passed as keyword\n argument 'errors' to bytes.decode.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of str\n Only present when `load_content=True`.\n The raw text data to learn.\n target : ndarray\n The target labels (integer index).\n target_names : list\n The names of target classes.\n DESCR : str\n The full description of the dataset.\n filenames: ndarray\n The filenames holding the dataset." - }, - { - "name": "load_data", - "decorators": [], - "parameters": [ - { - "name": "module_path", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The module path." - }, - { - "name": "data_file_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of csv file to be loaded from module_path/data/data_file_name. For example 'wine_data.csv'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Loads data from module_path/data/data_file_name.\n\nParameters\n----------\nmodule_path : string\n The module path.\n\ndata_file_name : string\n Name of csv file to be loaded from\n module_path/data/data_file_name. For example 'wine_data.csv'.\n\nReturns\n-------\ndata : Numpy array\n A 2D array with each row representing one sample and each column\n representing the features of a given sample.\n\ntarget : Numpy array\n A 1D array holding target variables for all the samples in `data.\n For example target[0] is the target varible for data[0].\n\ntarget_names : Numpy array\n A 1D array containing the names of the classifications. For example\n target_names[0] is the name of the target[0] class." - }, - { - "name": "load_wine", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object." - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the wine dataset (classification).\n\n.. versionadded:: 0.18\n\nThe wine dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class [59,71,48]\nSamples total 178\nDimensionality 13\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (178, 13)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (178,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (178, 14)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\nThe copy of UCI ML Wine Data Set dataset is downloaded and modified to fit\nstandard format from:\nhttps://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n\nExamples\n--------\nLet's say you are interested in the samples 10, 80, and 140, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_wine\n>>> data = load_wine()\n>>> data.target[[10, 80, 140]]\narray([0, 1, 2])\n>>> list(data.target_names)\n['class_0', 'class_1', 'class_2']" - }, - { - "name": "load_iris", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the iris dataset (classification).\n\nThe iris dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class 50\nSamples total 150\nDimensionality 4\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (150, 4)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (150,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (150, 5)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed two wrong data points according to Fisher's paper.\n The new version is the same as in R, but not as in the UCI\n Machine Learning Repository.\n\nExamples\n--------\nLet's say you are interested in the samples 10, 25, and 50, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_iris\n>>> data = load_iris()\n>>> data.target[[10, 25, 50]]\narray([0, 0, 1])\n>>> list(data.target_names)\n['setosa', 'versicolor', 'virginica']" - }, - { - "name": "load_breast_cancer", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the breast cancer wisconsin dataset (classification).\n\nThe breast cancer dataset is a classic and very easy binary classification\ndataset.\n\n================= ==============\nClasses 2\nSamples per class 212(M),357(B)\nSamples total 569\nDimensionality 30\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (569, 30)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (569,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (569, 31)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThe copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is\ndownloaded from:\nhttps://goo.gl/U2Uwz2\n\nExamples\n--------\nLet's say you are interested in the samples 10, 50, and 85, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> data = load_breast_cancer()\n>>> data.target[[10, 50, 85]]\narray([0, 1, 0])\n>>> list(data.target_names)\n['malignant', 'benign']" - }, - { - "name": "load_digits", - "decorators": [], - "parameters": [ - { - "name": "n_class", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of classes to return. Between 0 and 10." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the digits dataset (classification).\n\nEach datapoint is a 8x8 image of a digit.\n\n================= ==============\nClasses 10\nSamples per class ~180\nSamples total 1797\nDimensionality 64\nFeatures integers 0-16\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_class : int, default=10\n The number of classes to return. Between 0 and 10.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (1797, 64)\n The flattened data matrix. If `as_frame=True`, `data` will be\n a pandas DataFrame.\n target: {ndarray, Series} of shape (1797,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n\n .. versionadded:: 0.20\n\n frame: DataFrame of shape (1797, 65)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n images: {ndarray} of shape (1797, 8, 8)\n The raw image data.\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttps://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nExamples\n--------\nTo load the data and visualize the images::\n\n >>> from sklearn.datasets import load_digits\n >>> digits = load_digits()\n >>> print(digits.data.shape)\n (1797, 64)\n >>> import matplotlib.pyplot as plt #doctest: +SKIP\n >>> plt.gray() #doctest: +SKIP\n >>> plt.matshow(digits.images[0]) #doctest: +SKIP\n >>> plt.show() #doctest: +SKIP" - }, - { - "name": "load_diabetes", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the diabetes dataset (regression).\n\n============== ==================\nSamples total 442\nDimensionality 10\nFeatures real, -.2 < x < .2\nTargets integer 25 - 346\n============== ==================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False.\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (442, 10)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (442,)\n The regression target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n frame: DataFrame of shape (442, 11)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18" - }, - { - "name": "load_linnerud", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the physical excercise linnerud dataset.\n\nThis dataset is suitable for multi-ouput regression tasks.\n\n============== ============================\nSamples total 20\nDimensionality 3 (for both data and target)\nFeatures integer\nTargets integer\n============== ============================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (20, 3)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, dataframe} of shape (20, 3)\n The regression targets. If `as_frame=True`, `target` will be\n a pandas DataFrame.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n frame: DataFrame of shape (20, 6)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18" - }, - { - "name": "load_boston", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the boston house-prices dataset (regression).\n\n============== ==============\nSamples total 506\nDimensionality 13\nFeatures real, positive\nTargets real 5. - 50.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (506, 13)\n The data matrix.\n target : ndarray of shape (506, )\n The regression target.\n filename : str\n The physical location of boston csv dataset.\n\n .. versionadded:: 0.20\n\n DESCR : str\n The full description of the dataset.\n feature_names : ndarray\n The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed a wrong data point at [445, 0].\n\nExamples\n--------\n>>> from sklearn.datasets import load_boston\n>>> X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)" - }, - { - "name": "load_sample_images", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Load sample images for image manipulation.\n\nLoads both, ``china`` and ``flower``.\n\nRead more in the :ref:`User Guide `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n images : list of ndarray of shape (427, 640, 3)\n The two sample image.\n filenames : list\n The filenames for the images.\n DESCR : str\n The full description of the dataset.\n\nExamples\n--------\nTo load the data and visualize the images:\n\n>>> from sklearn.datasets import load_sample_images\n>>> dataset = load_sample_images() #doctest: +SKIP\n>>> len(dataset.images) #doctest: +SKIP\n2\n>>> first_img_data = dataset.images[0] #doctest: +SKIP\n>>> first_img_data.shape #doctest: +SKIP\n(427, 640, 3)\n>>> first_img_data.dtype #doctest: +SKIP\ndtype('uint8')" - }, - { - "name": "load_sample_image", - "decorators": [], - "parameters": [ - { - "name": "image_name", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the sample image loaded" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load the numpy array of a single sample image\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage_name : {`china.jpg`, `flower.jpg`}\n The name of the sample image loaded\n\nReturns\n-------\nimg : 3D array\n The image as a numpy array: height x width x color\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_sample_image\n>>> china = load_sample_image('china.jpg') # doctest: +SKIP\n>>> china.dtype # doctest: +SKIP\ndtype('uint8')\n>>> china.shape # doctest: +SKIP\n(427, 640, 3)\n>>> flower = load_sample_image('flower.jpg') # doctest: +SKIP\n>>> flower.dtype # doctest: +SKIP\ndtype('uint8')\n>>> flower.shape # doctest: +SKIP\n(427, 640, 3)" - }, - { - "name": "_pkl_filepath", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return filename for Python 3 pickles\n\nargs[-1] is expected to be the \".pkl\" filename. For compatibility with\nolder scikit-learn versions, a suffix is inserted before the extension.\n\n_pkl_filepath('/path/to/folder', 'filename.pkl') returns\n'/path/to/folder/filename_py3.pkl'" - }, - { - "name": "_sha256", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the sha256 hash of the file at path." - }, - { - "name": "_fetch_remote", - "decorators": [], - "parameters": [ - { - "name": "remote", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Named tuple containing remote dataset meta information: url, filename and checksum" - }, - { - "name": "dirname", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Directory to save the file to." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Helper function to download a remote dataset into path\n\nFetch a dataset pointed by remote's url, save into path using remote's\nfilename and ensure its integrity based on the SHA256 Checksum of the\ndownloaded file.\n\nParameters\n----------\nremote : RemoteFileMetadata\n Named tuple containing remote dataset meta information: url, filename\n and checksum\n\ndirname : string\n Directory to save the file to.\n\nReturns\n-------\nfile_path: string\n Full path of the created file." - } - ] - }, - { - "name": "sklearn.datasets._california_housing", - "imports": [ - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from os import makedirs", - "from os import remove", - "import tarfile", - "import numpy as np", - "import logging", - "import joblib", - "from None import get_data_home", - "from _base import _convert_data_dataframe", - "from _base import _fetch_remote", - "from _base import _pkl_filepath", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_california_housing", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target_columns. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the California housing dataset (regression).\n\n============== ==============\nSamples total 20640\nDimensionality 8\nFeatures real\nTarget real 0.15 - 5.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n\nreturn_X_y : bool, default=False.\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray, shape (20640, 8)\n Each row corresponding to the 8 feature values in order.\n If ``as_frame`` is True, ``data`` is a pandas object.\n target : numpy array of shape (20640,)\n Each value corresponds to the average\n house value in units of 100,000.\n If ``as_frame`` is True, ``target`` is a pandas object.\n feature_names : list of length 8\n Array of ordered feature names used in the dataset.\n DESCR : string\n Description of the California housing dataset.\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n .. versionadded:: 0.23\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\nNotes\n-----\n\nThis dataset consists of 20,640 samples and 9 features." - } - ] - }, - { - "name": "sklearn.datasets._covtype", - "imports": [ - "from gzip import GzipFile", - "import logging", - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from os import remove", - "from os import makedirs", - "import numpy as np", - "import joblib", - "from None import get_data_home", - "from _base import _convert_data_dataframe", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from _base import _pkl_filepath", - "from utils import check_random_state", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_covtype", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle dataset." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the covertype dataset (classification).\n\nDownload it if necessary.\n\n================= ============\nClasses 7\nSamples total 581012\nDimensionality 54\nFeatures int\n================= ============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is a pandas DataFrame or\n Series depending on the number of target columns. If `return_X_y` is\n True, then (`data`, `target`) will be pandas DataFrames or Series as\n described below.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (581012, 54)\n Each row corresponds to the 54 features in the dataset.\n target : ndarray of shape (581012,)\n Each value corresponds to one of\n the 7 forest covertypes with values\n ranging between 1 to 7.\n frame : dataframe of shape (581012, 53)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n Description of the forest covertype dataset.\n feature_names : list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - } - ] - }, - { - "name": "sklearn.datasets._kddcup99", - "imports": [ - "import errno", - "from gzip import GzipFile", - "import logging", - "import os", - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "import numpy as np", - "import joblib", - "from _base import _fetch_remote", - "from _base import _convert_data_dataframe", - "from None import get_data_home", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils import check_random_state", - "from utils import shuffle as shuffle_method", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_kddcup99", - "decorators": [], - "parameters": [ - { - "name": "subset", - "type": "Literal['SA', 'SF', 'http', 'smtp']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "To return the corresponding classical subsets of kddcup 99. If None, return the entire kddcup 99 dataset." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle dataset." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and for selection of abnormal samples if `subset='SA'`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "percent10", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to load only 10 percent of the data." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.20" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If `True`, returns a pandas Dataframe for the ``data`` and ``target`` objects in the `Bunch` returned object; `Bunch` return object will also have a ``frame`` member. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the kddcup99 dataset (classification).\n\nDownload it if necessary.\n\n================= ====================================\nClasses 23\nSamples total 4898431\nDimensionality 41\nFeatures discrete (int) or continuous (float)\n================= ====================================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsubset : {'SA', 'SF', 'http', 'smtp'}, default=None\n To return the corresponding classical subsets of kddcup 99.\n If None, return the entire kddcup 99 dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n .. versionadded:: 0.19\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and for\n selection of abnormal samples if `subset='SA'`. Pass an int for\n reproducible output across multiple function calls.\n See :term:`Glossary `.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If `True`, returns a pandas Dataframe for the ``data`` and ``target``\n objects in the `Bunch` returned object; `Bunch` return object will also\n have a ``frame`` member.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (494021, 41)\n The data matrix to learn. If `as_frame=True`, `data` will be a\n pandas DataFrame.\n target : {ndarray, series} of shape (494021,)\n The regression target for each sample. If `as_frame=True`, `target`\n will be a pandas Series.\n frame : dataframe of shape (494021, 42)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n The full description of the dataset.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - }, - { - "name": "_fetch_brute_kddcup99", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "percent10", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to load only 10 percent of the data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load the kddcup99 dataset, downloading it if necessary.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (494021, 41)\n Each row corresponds to the 41 features in the dataset.\n target : ndarray of shape (494021,)\n Each value corresponds to one of the 21 attack types or to the\n label 'normal.'.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n DESCR : str\n Description of the kddcup99 dataset." - }, - { - "name": "_mkdirp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure directory d exists (like mkdir -p on Unix)\nNo guarantee that the directory is writable." - } - ] - }, - { - "name": "sklearn.datasets._lfw", - "imports": [ - "from os import listdir", - "from os import makedirs", - "from os import remove", - "from os.path import dirname", - "from os.path import join", - "from os.path import exists", - "from os.path import isdir", - "import logging", - "import numpy as np", - "import joblib", - "from joblib import Memory", - "from _base import get_data_home", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import parse_version", - "import tarfile", - "from externals._pilutil import imread", - "from externals._pilutil import imresize" - ], - "classes": [], - "functions": [ - { - "name": "_check_fetch_lfw", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to download any missing LFW data" - }, - { - "name": "_load_imgs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Internally used to load images" - }, - { - "name": "_fetch_lfw_people", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform the actual data loading for the lfw people dataset\n\nThis operation is meant to be cached by a joblib wrapper." - }, - { - "name": "fetch_lfw_people", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "funneled", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Download and use the funneled variant of the dataset." - }, - { - "name": "resize", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ratio used to resize the each face picture." - }, - { - "name": "min_faces_per_person", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The extracted dataset will only retain pictures of people that have at least `min_faces_per_person` different pictures." - }, - { - "name": "color", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Keep the 3 RGB channels instead of averaging them to a single gray level channel. If color is True the shape of the data has one more dimension than the shape with color = False." - }, - { - "name": "slice_", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical correlation from the background" - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch object. See below for more information about the `dataset.data` and `dataset.target` object. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 5749\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\nmin_faces_per_person : int, default=None\n The extracted dataset will only retain pictures of people that have at\n least `min_faces_per_person` different pictures.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : numpy array of shape (13233, 2914)\n Each row corresponds to a ravelled face image\n of original size 62 x 47 pixels.\n Changing the ``slice_`` or resize parameters will change the\n shape of the output.\n images : numpy array of shape (13233, 62, 47)\n Each row is a face image corresponding to one of the 5749 people in\n the dataset. Changing the ``slice_``\n or resize parameters will change the shape of the output.\n target : numpy array of shape (13233,)\n Labels associated to each face image.\n Those labels range from 0-5748 and correspond to the person IDs.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - }, - { - "name": "_fetch_lfw_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform the actual data loading for the LFW pairs dataset\n\nThis operation is meant to be cached by a joblib wrapper." - }, - { - "name": "fetch_lfw_pairs", - "decorators": [], - "parameters": [ - { - "name": "subset", - "type": "Literal['train', 'test', '10_folds']", - "hasDefault": true, - "default": "'train'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the development training set, 'test' for the development test set, and '10_folds' for the official evaluation set that is meant to be used with a 10-folds cross validation." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "funneled", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Download and use the funneled variant of the dataset." - }, - { - "name": "resize", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ratio used to resize the each face picture." - }, - { - "name": "color", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Keep the 3 RGB channels instead of averaging them to a single gray level channel. If color is True the shape of the data has one more dimension than the shape with color = False." - }, - { - "name": "slice_", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical correlation from the background" - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 2\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task. As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsubset : {'train', 'test', '10_folds'}, default='train'\n Select the dataset to load: 'train' for the development training\n set, 'test' for the development test set, and '10_folds' for the\n official evaluation set that is meant to be used with a 10-folds\n cross validation.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By\n default all scikit-learn data is stored in '~/scikit_learn_data'\n subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (2200, 5828). Shape depends on ``subset``.\n Each row corresponds to 2 ravel'd face images\n of original size 62 x 47 pixels.\n Changing the ``slice_``, ``resize`` or ``subset`` parameters\n will change the shape of the output.\n pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``\n Each row has 2 face images corresponding\n to same or different person from the dataset\n containing 5749 people. Changing the ``slice_``,\n ``resize`` or ``subset`` parameters will change the shape of the\n output.\n target : numpy array of shape (2200,). Shape depends on ``subset``.\n Labels associated to each pair of images.\n The two label values being different persons or the same person.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset." - } - ] - }, - { - "name": "sklearn.datasets._olivetti_faces", - "imports": [ - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from os import makedirs", - "from os import remove", - "import numpy as np", - "from scipy.io.matlab import loadmat", - "import joblib", - "from None import get_data_home", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from _base import _pkl_filepath", - "from utils import check_random_state", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_olivetti_faces", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True the order of the dataset is shuffled to avoid having images of the same person grouped." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns `(data, target)` instead of a `Bunch` object. See below for more information about the `data` and `target` object. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the Olivetti faces data-set from AT&T (classification).\n\nDownload it if necessary.\n\n================= =====================\nClasses 40\nSamples total 400\nDimensionality 4096\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nshuffle : bool, default=False\n If True the order of the dataset is shuffled to avoid having\n images of the same person grouped.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data, target)` instead of a `Bunch` object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: ndarray, shape (400, 4096)\n Each row corresponds to a ravelled\n face image of original size 64 x 64 pixels.\n images : ndarray, shape (400, 64, 64)\n Each row is a face image\n corresponding to one of the 40 subjects of the dataset.\n target : ndarray, shape (400,)\n Labels associated to each face image.\n Those labels are ranging from 0-39 and correspond to the\n Subject IDs.\n DESCR : str\n Description of the modified Olivetti Faces Dataset.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22" - } - ] - }, - { - "name": "sklearn.datasets._openml", - "imports": [ - "import gzip", - "import json", - "import os", - "import shutil", - "import hashlib", - "from os.path import join", - "from warnings import warn", - "from contextlib import closing", - "from functools import wraps", - "from typing import Callable", - "from typing import Optional", - "from typing import Dict", - "from typing import Tuple", - "from typing import List", - "from typing import Any", - "from typing import Union", - "import itertools", - "from collections.abc import Generator", - "from collections import OrderedDict", - "from functools import partial", - "from urllib.request import urlopen", - "from urllib.request import Request", - "import numpy as np", - "import scipy.sparse", - "from externals import _arff", - "from externals._arff import ArffSparseDataType", - "from externals._arff import ArffContainerType", - "from None import get_data_home", - "from urllib.error import HTTPError", - "from utils import Bunch", - "from utils import get_chunk_n_rows", - "from utils import _chunk_generator", - "from utils import check_pandas_support", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "OpenMLError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "HTTP 412 is a specific OpenML error code, indicating a generic error" - } - ], - "functions": [ - { - "name": "_get_local_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_retry_with_clean_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "If the first call to the decorated function fails, the local cached\nfile is removed, and the function is called again. If ``data_home`` is\n``None``, then the function is called once." - }, - { - "name": "_open_openml_url", - "decorators": [], - "parameters": [ - { - "name": "openml_path", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OpenML URL that will be accessed. This will be prefixes with _OPENML_PREFIX" - }, - { - "name": "data_home", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Directory to which the files will be cached. If None, no caching will be applied." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Returns a resource from OpenML.org. Caches it to data_home if required.\n\nParameters\n----------\nopenml_path : str\n OpenML URL that will be accessed. This will be prefixes with\n _OPENML_PREFIX\n\ndata_home : str\n Directory to which the files will be cached. If None, no caching will\n be applied.\n\nReturns\n-------\nresult : stream\n A stream to the OpenML resource" - }, - { - "name": "_get_json_content_from_openml_api", - "decorators": [], - "parameters": [ - { - "name": "url", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The URL to load from. Should be an official OpenML endpoint" - }, - { - "name": "error_message", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The error message to raise if an acceptable OpenML error is thrown (acceptable error is, e.g., data id not found. Other errors, like 404's will throw the native error message)" - }, - { - "name": "data_home", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Location to cache the response. None if no cache is required." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Loads json data from the openml api\n\nParameters\n----------\nurl : str\n The URL to load from. Should be an official OpenML endpoint\n\nerror_message : str or None\n The error message to raise if an acceptable OpenML error is thrown\n (acceptable error is, e.g., data id not found. Other errors, like 404's\n will throw the native error message)\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\njson_data : json\n the json result from the OpenML server if the call was successful.\n An exception otherwise." - }, - { - "name": "_split_sparse_columns", - "decorators": [], - "parameters": [ - { - "name": "arff_data", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A tuple of three lists of equal size; first list indicating the value, second the x coordinate and the third the y coordinate." - }, - { - "name": "include_columns", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of columns to include." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])\n\nParameters\n----------\narff_data : tuple\n A tuple of three lists of equal size; first list indicating the value,\n second the x coordinate and the third the y coordinate.\n\ninclude_columns : list\n A list of columns to include.\n\nReturns\n-------\narff_data_new : tuple\n Subset of arff data with only the include columns indicated by the\n include_columns argument." - }, - { - "name": "_sparse_data_to_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_convert_arff_data", - "decorators": [], - "parameters": [ - { - "name": "arff", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "As obtained from liac-arff object." - }, - { - "name": "col_slice_x", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The column indices that are sliced from the original array to return as X data" - }, - { - "name": "col_slice_y", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The column indices that are sliced from the original array to return as y data" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncol_slice_x : list\n The column indices that are sliced from the original array to return\n as X data\n\ncol_slice_y : list\n The column indices that are sliced from the original array to return\n as y data\n\nReturns\n-------\nX : np.array or scipy.sparse.csr_matrix\ny : np.array" - }, - { - "name": "_feature_to_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Map feature to dtype for pandas DataFrame\n " - }, - { - "name": "_convert_arff_data_dataframe", - "decorators": [], - "parameters": [ - { - "name": "arff", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "As obtained from liac-arff object." - }, - { - "name": "columns", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Columns from dataframe to return." - }, - { - "name": "features_dict", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maps feature name to feature info from openml." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the ARFF object into a pandas DataFrame.\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncolumns : list\n Columns from dataframe to return.\n\nfeatures_dict : dict\n Maps feature name to feature info from openml.\n\nReturns\n-------\nresult : tuple\n tuple with the resulting dataframe" - }, - { - "name": "_get_data_info_by_name", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "name of the dataset" - }, - { - "name": "version", - "type": "Union[str, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If version is an integer, the exact name/version will be obtained from OpenML. If version is a string (value: \"active\") it will take the first version from OpenML that is annotated as active. Any other string values except \"active\" are treated as integer." - }, - { - "name": "data_home", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Location to cache the response. None if no cache is required." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Utilizes the openml dataset listing api to find a dataset by\nname/version\nOpenML api function:\nhttps://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name\n\nParameters\n----------\nname : str\n name of the dataset\n\nversion : int or str\n If version is an integer, the exact name/version will be obtained from\n OpenML. If version is a string (value: \"active\") it will take the first\n version from OpenML that is annotated as active. Any other string\n values except \"active\" are treated as integer.\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\nfirst_dataset : json\n json representation of the first dataset object that adhired to the\n search criteria" - }, - { - "name": "_get_data_description_by_id", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_qualities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_num_samples", - "decorators": [], - "parameters": [ - { - "name": "data_qualities", - "type": "List[Dict]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to retrieve the number of instances (samples) in the dataset." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the number of samples from data qualities.\n\nParameters\n----------\ndata_qualities : list of dict\n Used to retrieve the number of instances (samples) in the dataset.\n\nReturns\n-------\nn_samples : int\n The number of samples in the dataset or -1 if data qualities are\n unavailable." - }, - { - "name": "_load_arff_response", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load arff data with url and parses arff response with parse_arff" - }, - { - "name": "_download_data_to_bunch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Download OpenML ARFF and convert to Bunch of data\n " - }, - { - "name": "_verify_target_data_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_valid_data_column_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fetch_openml", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String identifier of the dataset. Note that OpenML can have multiple datasets with the same name." - }, - { - "name": "version", - "type": "Union[Literal['active'], int]", - "hasDefault": true, - "default": "'active'", - "limitation": null, - "ignored": false, - "docstring": "Version of the dataset. Can only be provided if also ``name`` is given. If 'active' the oldest version that's still active is used. Since there may be more than one active version of a dataset, and those versions may fundamentally be different from one another, setting an exact version is highly recommended." - }, - { - "name": "data_id", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OpenML ID of the dataset. The most specific way of retrieving a dataset. If data_id is not given, name (and potential version) are used to obtain a dataset." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the data sets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "target_column", - "type": "Optional[Union[List, str]]", - "hasDefault": true, - "default": "'default-target'", - "limitation": null, - "ignored": false, - "docstring": "Specify the column name in the data to use as target. If 'default-target', the standard target column a stored on the server is used. If ``None``, all columns are returned as data and the target is ``None``. If list (of strings), all columns with these names are returned as multi-target (Note: not all scikit-learn classifiers can handle all types of multi-output combinations)" - }, - { - "name": "cache", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to cache downloaded datasets using joblib." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` objects." - }, - { - "name": "as_frame", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target_columns. The Bunch will contain a ``frame`` attribute with the target and the data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas DataFrames or Series as describe above. If as_frame is 'auto', the data and target will be converted to DataFrame or Series as if as_frame is set to True, unless the dataset is stored in sparse format. .. versionchanged:: 0.24 The default value of `as_frame` changed from `False` to `'auto'` in 0.24." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n The API is experimental (particularly the return value structure),\n and might have small backward-incompatible changes without notice\n or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n String identifier of the dataset. Note that OpenML can have multiple\n datasets with the same name.\n\nversion : int or 'active', default='active'\n Version of the dataset. Can only be provided if also ``name`` is given.\n If 'active' the oldest version that's still active is used. Since\n there may be more than one active version of a dataset, and those\n versions may fundamentally be different from one another, setting an\n exact version is highly recommended.\n\ndata_id : int, default=None\n OpenML ID of the dataset. The most specific way of retrieving a\n dataset. If data_id is not given, name (and potential version) are\n used to obtain a dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the data sets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n Specify the column name in the data to use as target. If\n 'default-target', the standard target column a stored on the server\n is used. If ``None``, all columns are returned as data and the\n target is ``None``. If list (of strings), all columns with these names\n are returned as multi-target (Note: not all scikit-learn classifiers\n can handle all types of multi-output combinations)\n\ncache : bool, default=True\n Whether to cache downloaded datasets using joblib.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n The Bunch will contain a ``frame`` attribute with the target and the\n data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n DataFrames or Series as describe above.\n\n If as_frame is 'auto', the data and target will be converted to\n DataFrame or Series as if as_frame is set to True, unless the dataset\n is stored in sparse format.\n\n .. versionchanged:: 0.24\n The default value of `as_frame` changed from `False` to `'auto'`\n in 0.24.\n\nReturns\n-------\n\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n The feature matrix. Categorical features are encoded as ordinals.\n target : np.array, pandas Series or DataFrame\n The regression target or classification labels, if applicable.\n Dtype is float if numeric, and object if categorical. If\n ``as_frame`` is True, ``target`` is a pandas object.\n DESCR : str\n The full description of the dataset\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n .. versionadded:: 0.22\n\n categories : dict or None\n Maps each categorical feature name to a list of values, such\n that the value encoded as i is ith in the list. If ``as_frame``\n is True, this is None.\n details : dict\n More metadata from OpenML\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. note:: EXPERIMENTAL\n\n This interface is **experimental** and subsequent releases may\n change attributes without notice (although there should only be\n minor changes to ``data`` and ``target``).\n\n Missing values in the 'data' are represented as NaN's. Missing values\n in 'target' are represented as NaN's (numerical target) or None\n (categorical target)" - } - ] - }, - { - "name": "sklearn.datasets._rcv1", - "imports": [ - "import logging", - "from os import remove", - "from os import makedirs", - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from gzip import GzipFile", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from None import get_data_home", - "from _base import _pkl_filepath", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from _svmlight_format_io import load_svmlight_files", - "from utils import shuffle as shuffle_", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_rcv1", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "subset", - "type": "Literal['train', 'test', 'all']", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the training set (23149 samples), 'test' for the test set (781265 samples), 'all' for both, with the training samples first if shuffle is False. This follows the official LYRL2004 chronological split." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle dataset." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch object. See below for more information about the `dataset.data` and `dataset.target` object. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the RCV1 multilabel dataset (classification).\n\nDownload it if necessary.\n\nVersion: RCV1-v2, vectors, full sets, topics multilabels.\n\n================= =====================\nClasses 103\nSamples total 804414\nDimensionality 47236\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='all'\n Select the dataset to load: 'train' for the training set\n (23149 samples), 'test' for the test set (781265 samples),\n 'all' for both, with the training samples first if shuffle is False.\n This follows the official LYRL2004 chronological split.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : sparse matrix of shape (804414, 47236), dtype=np.float64\n The array has 0.16% of non zero values. Will be of CSR format.\n target : sparse matrix of shape (804414, 103), dtype=np.uint8\n Each sample has a value of 1 in its categories, and 0 in others.\n The array has 3.15% of non zero values. Will be of CSR format.\n sample_id : ndarray of shape (804414,), dtype=np.uint32,\n Identification number of each sample, as ordered in dataset.data.\n target_names : ndarray of shape (103,), dtype=object\n Names of each target (RCV1 topics), as ordered in dataset.target.\n DESCR : str\n Description of the RCV1 dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - }, - { - "name": "_inverse_permutation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse permutation p." - }, - { - "name": "_find_permutation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the permutation from a to b." - } - ] - }, - { - "name": "sklearn.datasets._samples_generator", - "imports": [ - "import numbers", - "import array", - "from collections.abc import Iterable", - "import numpy as np", - "from scipy import linalg", - "import scipy.sparse as sp", - "from preprocessing import MultiLabelBinarizer", - "from utils import check_array", - "from utils import check_random_state", - "from utils import shuffle as util_shuffle", - "from utils.random import sample_without_replacement", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "_generate_hypercube", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns distinct binary samples of length dimensions.\n " - }, - { - "name": "make_classification", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The total number of features. These comprise ``n_informative`` informative features, ``n_redundant`` redundant features, ``n_repeated`` duplicated features and ``n_features-n_informative-n_redundant-n_repeated`` useless features drawn at random." - }, - { - "name": "n_informative", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of informative features. Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension ``n_informative``. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube." - }, - { - "name": "n_redundant", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of redundant features. These features are generated as random linear combinations of the informative features." - }, - { - "name": "n_repeated", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of duplicated features, drawn randomly from the informative and the redundant features." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of classes (or labels) of the classification problem." - }, - { - "name": "n_clusters_per_class", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters per class." - }, - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportions of samples assigned to each class. If None, then classes are balanced. Note that if ``len(weights) == n_classes - 1``, then the last class weight is automatically inferred. More than ``n_samples`` samples may be returned if the sum of ``weights`` exceeds 1. Note that the actual class proportions will not exactly match ``weights`` when ``flip_y`` isn't 0." - }, - { - "name": "flip_y", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The fraction of samples whose class is assigned randomly. Larger values introduce noise in the labels and make the classification task harder. Note that the default setting flip_y > 0 might lead to less than ``n_classes`` in y in some cases." - }, - { - "name": "class_sep", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The factor multiplying the hypercube size. Larger values spread out the clusters/classes and make the classification task easier." - }, - { - "name": "hypercube", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the clusters are put on the vertices of a hypercube. If False, the clusters are put on the vertices of a random polytope." - }, - { - "name": "shift", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Shift features by the specified value. If None, then features are shifted by a random value drawn in [-class_sep, class_sep]." - }, - { - "name": "scale", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Multiply features by the specified value. If None, then features are scaled by a random value drawn in [1, 100]. Note that scaling happens after shifting." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples and the features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random n-class classification problem.\n\nThis initially creates clusters of points normally distributed (std=1)\nabout vertices of an ``n_informative``-dimensional hypercube with sides of\nlength ``2*class_sep`` and assigns an equal number of clusters to each\nclass. It introduces interdependence between these features and adds\nvarious types of further noise to the data.\n\nWithout shuffling, ``X`` horizontally stacks features in the following\norder: the primary ``n_informative`` features, followed by ``n_redundant``\nlinear combinations of the informative features, followed by ``n_repeated``\nduplicates, drawn randomly with replacement from the informative and\nredundant features. The remaining features are filled with random noise.\nThus, without shuffling, all useful features are contained in the columns\n``X[:, :n_informative + n_redundant + n_repeated]``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features. These comprise ``n_informative``\n informative features, ``n_redundant`` redundant features,\n ``n_repeated`` duplicated features and\n ``n_features-n_informative-n_redundant-n_repeated`` useless features\n drawn at random.\n\nn_informative : int, default=2\n The number of informative features. Each class is composed of a number\n of gaussian clusters each located around the vertices of a hypercube\n in a subspace of dimension ``n_informative``. For each cluster,\n informative features are drawn independently from N(0, 1) and then\n randomly linearly combined within each cluster in order to add\n covariance. The clusters are then placed on the vertices of the\n hypercube.\n\nn_redundant : int, default=2\n The number of redundant features. These features are generated as\n random linear combinations of the informative features.\n\nn_repeated : int, default=0\n The number of duplicated features, drawn randomly from the informative\n and the redundant features.\n\nn_classes : int, default=2\n The number of classes (or labels) of the classification problem.\n\nn_clusters_per_class : int, default=2\n The number of clusters per class.\n\nweights : array-like of shape (n_classes,) or (n_classes - 1,), default=None\n The proportions of samples assigned to each class. If None, then\n classes are balanced. Note that if ``len(weights) == n_classes - 1``,\n then the last class weight is automatically inferred.\n More than ``n_samples`` samples may be returned if the sum of\n ``weights`` exceeds 1. Note that the actual class proportions will\n not exactly match ``weights`` when ``flip_y`` isn't 0.\n\nflip_y : float, default=0.01\n The fraction of samples whose class is assigned randomly. Larger\n values introduce noise in the labels and make the classification\n task harder. Note that the default setting flip_y > 0 might lead\n to less than ``n_classes`` in y in some cases.\n\nclass_sep : float, default=1.0\n The factor multiplying the hypercube size. Larger values spread\n out the clusters/classes and make the classification task easier.\n\nhypercube : bool, default=True\n If True, the clusters are put on the vertices of a hypercube. If\n False, the clusters are put on the vertices of a random polytope.\n\nshift : float, ndarray of shape (n_features,) or None, default=0.0\n Shift features by the specified value. If None, then features\n are shifted by a random value drawn in [-class_sep, class_sep].\n\nscale : float, ndarray of shape (n_features,) or None, default=1.0\n Multiply features by the specified value. If None, then features\n are scaled by a random value drawn in [1, 100]. Note that scaling\n happens after shifting.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for class membership of each sample.\n\nNotes\n-----\nThe algorithm is adapted from Guyon [1] and was designed to generate\nthe \"Madelon\" dataset.\n\nReferences\n----------\n.. [1] I. Guyon, \"Design of experiments for the NIPS 2003 variable\n selection benchmark\", 2003.\n\nSee Also\n--------\nmake_blobs : Simplified variant.\nmake_multilabel_classification : Unrelated generator for multilabel tasks." - }, - { - "name": "make_multilabel_classification", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The total number of features." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of classes of the classification problem." - }, - { - "name": "n_labels", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The average number of labels per instance. More precisely, the number of labels per sample is drawn from a Poisson distribution with ``n_labels`` as its expected value, but samples are bounded (using rejection sampling) by ``n_classes``, and must be nonzero if ``allow_unlabeled`` is False." - }, - { - "name": "length", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "The sum of the features (number of words if documents) is drawn from a Poisson distribution with this expected value." - }, - { - "name": "allow_unlabeled", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, some instances might not belong to any class." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, return a sparse feature matrix .. versionadded:: 0.17 parameter to allow *sparse* output." - }, - { - "name": "return_indicator", - "type": "Literal['dense', 'sparse']", - "hasDefault": true, - "default": "'dense'", - "limitation": null, - "ignored": false, - "docstring": "If ``'dense'`` return ``Y`` in the dense binary indicator format. If ``'sparse'`` return ``Y`` in the sparse binary indicator format. ``False`` returns a list of lists of labels." - }, - { - "name": "return_distributions", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, return the prior class probability and conditional probabilities of features given classes, from which the data was drawn." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n - pick the number of labels: n ~ Poisson(n_labels)\n - n times, choose a class c: c ~ Multinomial(theta)\n - pick the document length: k ~ Poisson(length)\n - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features.\n\nn_classes : int, default=5\n The number of classes of the classification problem.\n\nn_labels : int, default=2\n The average number of labels per instance. More precisely, the number\n of labels per sample is drawn from a Poisson distribution with\n ``n_labels`` as its expected value, but samples are bounded (using\n rejection sampling) by ``n_classes``, and must be nonzero if\n ``allow_unlabeled`` is False.\n\nlength : int, default=50\n The sum of the features (number of words if documents) is drawn from\n a Poisson distribution with this expected value.\n\nallow_unlabeled : bool, default=True\n If ``True``, some instances might not belong to any class.\n\nsparse : bool, default=False\n If ``True``, return a sparse feature matrix\n\n .. versionadded:: 0.17\n parameter to allow *sparse* output.\n\nreturn_indicator : {'dense', 'sparse'} or False, default='dense'\n If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n ``'sparse'`` return ``Y`` in the sparse binary indicator format.\n ``False`` returns a list of lists of labels.\n\nreturn_distributions : bool, default=False\n If ``True``, return the prior class probability and conditional\n probabilities of features given classes, from which the data was\n drawn.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n The label sets. Sparse matrix should be of CSR format.\n\np_c : ndarray of shape (n_classes,)\n The probability of each class being drawn. Only returned if\n ``return_distributions=True``.\n\np_w_c : ndarray of shape (n_features, n_classes)\n The probability of each feature being drawn given each class.\n Only returned if ``return_distributions=True``." - }, - { - "name": "make_hastie_10_2", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "12000", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates data for binary classification used in\nHastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=12000\n The number of samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 10)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\nSee Also\n--------\nmake_gaussian_quantiles : A generalization of this dataset approach." - }, - { - "name": "make_regression", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - }, - { - "name": "n_informative", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of informative features, i.e., the number of features used to build the linear model used to generate the output." - }, - { - "name": "n_targets", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar." - }, - { - "name": "bias", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The bias term in the underlying linear model." - }, - { - "name": "effective_rank", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "if not None: The approximate number of singular vectors required to explain most of the input data by linear combinations. Using this kind of singular spectrum in the input allows the generator to reproduce the correlations often observed in practice. if None: The input set is well conditioned, centered and gaussian with unit variance." - }, - { - "name": "tail_strength", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The relative importance of the fat noisy tail of the singular values profile if `effective_rank` is not None. When a float, it should be between 0 and 1." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples and the features." - }, - { - "name": "coef", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the coefficients of the underlying linear model are returned." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random regression problem.\n\nThe input set can either be well conditioned (by default) or have a low\nrank-fat tail singular profile. See :func:`make_low_rank_matrix` for\nmore details.\n\nThe output is generated by applying a (potentially biased) random linear\nregression model with `n_informative` nonzero regressors to the previously\ngenerated input and some gaussian centered noise with some adjustable\nscale.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\nn_informative : int, default=10\n The number of informative features, i.e., the number of features used\n to build the linear model used to generate the output.\n\nn_targets : int, default=1\n The number of regression targets, i.e., the dimension of the y output\n vector associated with a sample. By default, the output is a scalar.\n\nbias : float, default=0.0\n The bias term in the underlying linear model.\n\neffective_rank : int, default=None\n if not None:\n The approximate number of singular vectors required to explain most\n of the input data by linear combinations. Using this kind of\n singular spectrum in the input allows the generator to reproduce\n the correlations often observed in practice.\n if None:\n The input set is well conditioned, centered and gaussian with\n unit variance.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile if `effective_rank` is not None. When a float, it should be\n between 0 and 1.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\ncoef : bool, default=False\n If True, the coefficients of the underlying linear model are returned.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n The output values.\n\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n The coefficient of the underlying linear model. It is returned only if\n coef is True." - }, - { - "name": "make_circles", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "If int, it is the total number of points generated. For odd numbers, the inner circle will have one point more than the outer circle. If two-element tuple, number of points in outer circle and inner circle. .. versionchanged:: 0.23 Added two-element tuple." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Standard deviation of Gaussian noise added to the data." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "factor", - "type": "float", - "hasDefault": true, - "default": ".", - "limitation": null, - "ignored": false, - "docstring": "Scale factor between inner and outer circle in the range `(0, 1)`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a large circle containing a smaller circle in 2d.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, it is the total number of points generated.\n For odd numbers, the inner circle will have one point more than the\n outer circle.\n If two-element tuple, number of points in outer circle and inner\n circle.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nfactor : float, default=.8\n Scale factor between inner and outer circle in the range `(0, 1)`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample." - }, - { - "name": "make_moons", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "If int, the total number of points generated. If two-element tuple, number of points in each of two moons. .. versionchanged:: 0.23 Added two-element tuple." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Standard deviation of Gaussian noise added to the data." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make two interleaving half circles.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms. Read more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, the total number of points generated.\n If two-element tuple, number of points in each of two moons.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample." - }, - { - "name": "make_blobs", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "If int, it is the total number of points equally divided among clusters. If array-like, each element of the sequence indicates the number of samples per cluster. .. versionchanged:: v0.20 one can now pass an array-like to the ``n_samples`` parameter" - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of features for each sample." - }, - { - "name": "centers", - "type": "Union[NDArray, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of centers to generate, or the fixed center locations. If n_samples is an int and centers is None, 3 centers are generated. If n_samples is array-like, centers must be either None or an array of length equal to the length of n_samples." - }, - { - "name": "cluster_std", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the clusters." - }, - { - "name": "center_box", - "type": "Tuple[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The bounding box for each cluster center when centers are generated at random." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_centers", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, then return the centers of each cluster .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate isotropic Gaussian blobs for clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like, default=100\n If int, it is the total number of points equally divided among\n clusters.\n If array-like, each element of the sequence indicates\n the number of samples per cluster.\n\n .. versionchanged:: v0.20\n one can now pass an array-like to the ``n_samples`` parameter\n\nn_features : int, default=2\n The number of features for each sample.\n\ncenters : int or ndarray of shape (n_centers, n_features), default=None\n The number of centers to generate, or the fixed center locations.\n If n_samples is an int and centers is None, 3 centers are generated.\n If n_samples is array-like, centers must be\n either None or an array of length equal to the length of n_samples.\n\ncluster_std : float or array-like of float, default=1.0\n The standard deviation of the clusters.\n\ncenter_box : tuple of float (min, max), default=(-10.0, 10.0)\n The bounding box for each cluster center when centers are\n generated at random.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nreturn_centers : bool, default=False\n If True, then return the centers of each cluster\n\n .. versionadded:: 0.23\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for cluster membership of each sample.\n\ncenters : ndarray of shape (n_centers, n_features)\n The centers of each cluster. Only returned if\n ``return_centers=True``.\n\nExamples\n--------\n>>> from sklearn.datasets import make_blobs\n>>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])\n>>> X, y = make_blobs(n_samples=[3, 3, 4], centers=None, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 1, 2, 0, 2, 2, 2, 1, 1, 0])\n\nSee Also\n--------\nmake_classification : A more intricate variant." - }, - { - "name": "make_friedman1", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of features. Should be at least 5." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features. Should be at least 5.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996." - }, - { - "name": "make_friedman2", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996." - }, - { - "name": "make_friedman3", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996." - }, - { - "name": "make_low_rank_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - }, - { - "name": "effective_rank", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The approximate number of singular vectors required to explain most of the data by linear combinations." - }, - { - "name": "tail_strength", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The relative importance of the fat noisy tail of the singular values profile. The value should be between 0 and 1." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\n tail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\neffective_rank : int, default=10\n The approximate number of singular vectors required to explain most of\n the data by linear combinations.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile. The value should be between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The matrix." - }, - { - "name": "make_sparse_coded_signal", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components in the dictionary" - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features of the dataset to generate" - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of active (non-zero) coefficients in each sample" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a signal as a sparse combination of dictionary elements.\n\nReturns a matrix Y = DX, such as D is (n_features, n_components),\nX is (n_components, n_samples) and each column of X has exactly\nn_nonzero_coefs non-zero elements.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int\n Number of samples to generate\n\nn_components : int\n Number of components in the dictionary\n\nn_features : int\n Number of features of the dataset to generate\n\nn_nonzero_coefs : int\n Number of active (non-zero) coefficients in each sample\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : ndarray of shape (n_features, n_samples)\n The encoded signal (Y).\n\ndictionary : ndarray of shape (n_features, n_components)\n The dictionary with normalized components (D).\n\ncode : ndarray of shape (n_components, n_samples)\n The sparse code such that each column of this matrix has exactly\n n_nonzero_coefs non-zero items (X)." - }, - { - "name": "make_sparse_uncorrelated", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\n X ~ N(0, 1)\n y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] G. Celeux, M. El Anbari, J.-M. Marin, C. P. Robert,\n \"Regularization in regression: comparing Bayesian and frequentist\n methods in a poorly informative situation\", 2009." - }, - { - "name": "make_spd_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_dim", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The matrix dimension." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random symmetric, positive-definite matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_dim : int\n The matrix dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_dim, n_dim)\n The random symmetric, positive-definite matrix.\n\nSee Also\n--------\nmake_sparse_spd_matrix" - }, - { - "name": "make_sparse_spd_matrix", - "decorators": [], - "parameters": [ - { - "name": "dim", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The size of the random matrix to generate." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The probability that a coefficient is zero (see notes). Larger values enforce more sparsity. The value should be in the range 0 and 1." - }, - { - "name": "norm_diag", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to normalize the output matrix to make the leading diagonal elements all 1" - }, - { - "name": "smallest_coef", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The value of the smallest coefficient between 0 and 1." - }, - { - "name": "largest_coef", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The value of the largest coefficient between 0 and 1." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate a sparse symmetric definite positive matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndim : int, default=1\n The size of the random matrix to generate.\n\nalpha : float, default=0.95\n The probability that a coefficient is zero (see notes). Larger values\n enforce more sparsity. The value should be in the range 0 and 1.\n\nnorm_diag : bool, default=False\n Whether to normalize the output matrix to make the leading diagonal\n elements all 1\n\nsmallest_coef : float, default=0.1\n The value of the smallest coefficient between 0 and 1.\n\nlargest_coef : float, default=0.9\n The value of the largest coefficient between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nprec : sparse matrix of shape (dim, dim)\n The generated matrix.\n\nNotes\n-----\nThe sparsity is actually imposed on the cholesky factor of the matrix.\nThus alpha does not translate directly into the filling fraction of\nthe matrix itself.\n\nSee Also\n--------\nmake_spd_matrix" - }, - { - "name": "make_swiss_roll", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of sample points on the S curve." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a swiss roll dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of sample points on the S curve.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 3)\n The points.\n\nt : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold.\n\nNotes\n-----\nThe algorithm is from Marsland [1].\n\nReferences\n----------\n.. [1] S. Marsland, \"Machine Learning: An Algorithmic Perspective\",\n Chapter 10, 2009.\n http://seat.massey.ac.nz/personal/s.r.marsland/Code/10/lle.py" - }, - { - "name": "make_s_curve", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of sample points on the S curve." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate an S curve dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of sample points on the S curve.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 3)\n The points.\n\nt : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold." - }, - { - "name": "make_gaussian_quantiles", - "decorators": [], - "parameters": [ - { - "name": "mean", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The mean of the multi-dimensional normal distribution. If None then use the origin (0, 0, ...)." - }, - { - "name": "cov", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The covariance matrix will be this value times the unit matrix. This dataset only produces symmetric normal distributions." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The total number of points equally divided among classes." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of features for each sample." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of classes" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate isotropic Gaussian and label samples by quantile.\n\nThis classification dataset is constructed by taking a multi-dimensional\nstandard normal distribution and defining classes separated by nested\nconcentric multi-dimensional spheres such that roughly equal numbers of\nsamples are in each class (quantiles of the :math:`\\chi^2` distribution).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmean : ndarray of shape (n_features,), default=None\n The mean of the multi-dimensional normal distribution.\n If None then use the origin (0, 0, ...).\n\ncov : float, default=1.0\n The covariance matrix will be this value times the unit matrix. This\n dataset only produces symmetric normal distributions.\n\nn_samples : int, default=100\n The total number of points equally divided among classes.\n\nn_features : int, default=2\n The number of features for each sample.\n\nn_classes : int, default=3\n The number of classes\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for quantile membership of each sample.\n\nNotes\n-----\nThe dataset is from Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009." - }, - { - "name": "_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_biclusters", - "decorators": [], - "parameters": [ - { - "name": "shape", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The shape of the result." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of biclusters." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "minval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Minimum value of a bicluster." - }, - { - "name": "maxval", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum value of a bicluster." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate an array with constant block diagonal structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nshape : iterable of shape (n_rows, n_cols)\n The shape of the result.\n\nn_clusters : int\n The number of biclusters.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nminval : int, default=10\n Minimum value of a bicluster.\n\nmaxval : int, default=100\n Maximum value of a bicluster.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape `shape`\n The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\nReferences\n----------\n\n.. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n words using bipartite spectral graph partitioning. In Proceedings\n of the seventh ACM SIGKDD international conference on Knowledge\n discovery and data mining (pp. 269-274). ACM.\n\nSee Also\n--------\nmake_checkerboard" - }, - { - "name": "make_checkerboard", - "decorators": [], - "parameters": [ - { - "name": "shape", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The shape of the result." - }, - { - "name": "n_clusters", - "type": "Union[ArrayLike, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of row and column clusters." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "minval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Minimum value of a bicluster." - }, - { - "name": "maxval", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum value of a bicluster." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate an array with block checkerboard structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nshape : tuple of shape (n_rows, n_cols)\n The shape of the result.\n\nn_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n The number of row and column clusters.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nminval : int, default=10\n Minimum value of a bicluster.\n\nmaxval : int, default=100\n Maximum value of a bicluster.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape `shape`\n The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\n\nReferences\n----------\n\n.. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n Spectral biclustering of microarray data: coclustering genes\n and conditions. Genome research, 13(4), 703-716.\n\nSee Also\n--------\nmake_biclusters" - } - ] - }, - { - "name": "sklearn.datasets._species_distributions", - "imports": [ - "from io import BytesIO", - "from os import makedirs", - "from os import remove", - "from os.path import exists", - "import logging", - "import numpy as np", - "import joblib", - "from None import get_data_home", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args", - "from _base import _pkl_filepath" - ], - "classes": [], - "functions": [ - { - "name": "_load_coverage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load a coverage file from an open file object.\n\nThis will return a numpy array of the given dtype" - }, - { - "name": "_load_csv", - "decorators": [], - "parameters": [ - { - "name": "F", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "CSV file open in byte mode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load csv file.\n\nParameters\n----------\nF : file object\n CSV file open in byte mode.\n\nReturns\n-------\nrec : np.ndarray\n record array representing the data" - }, - { - "name": "construct_grids", - "decorators": [], - "parameters": [ - { - "name": "batch", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object returned by :func:`fetch_species_distributions`" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct the map grid from the batch object\n\nParameters\n----------\nbatch : Batch object\n The object returned by :func:`fetch_species_distributions`\n\nReturns\n-------\n(xgrid, ygrid) : 1-D arrays\n The grid corresponding to the values in batch.coverages" - }, - { - "name": "fetch_species_distributions", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Loader for species distribution dataset from Phillips et. al. (2006)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n coverages : array, shape = [14, 1592, 1212]\n These represent the 14 features measured\n at each point of the map grid.\n The latitude/longitude values for the grid are discussed below.\n Missing data is represented by the value -9999.\n train : record array, shape = (1624,)\n The training points for the data. Each point has three fields:\n\n - train['species'] is the species name\n - train['dd long'] is the longitude, in degrees\n - train['dd lat'] is the latitude, in degrees\n test : record array, shape = (620,)\n The test points for the data. Same format as the training data.\n Nx, Ny : integers\n The number of longitudes (x) and latitudes (y) in the grid\n x_left_lower_corner, y_left_lower_corner : floats\n The (x,y) position of the lower-left corner, in degrees\n grid_size : float\n The spacing between points of the grid, in degrees\n\nReferences\n----------\n\n* `\"Maximum entropy modeling of species geographic distributions\"\n `_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n\nNotes\n-----\n\nThis dataset represents the geographic distribution of species.\nThe dataset is provided by Phillips et. al. (2006).\n\nThe two species are:\n\n- `\"Bradypus variegatus\"\n `_ ,\n the Brown-throated Sloth.\n\n- `\"Microryzomys minutus\"\n `_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n- For an example of using this dataset with scikit-learn, see\n :ref:`examples/applications/plot_species_distribution_modeling.py\n `." - } - ] - }, - { - "name": "sklearn.datasets._svmlight_format_io", - "imports": [ - "from contextlib import closing", - "import io", - "import os.path", - "import numpy as np", - "import scipy.sparse as sp", - "from None import __version__", - "from utils import check_array", - "from utils import IS_PYPY", - "from utils.validation import _deprecate_positional_args", - "from _svmlight_format_fast import _load_svmlight_file", - "import gzip", - "from bz2 import BZ2File" - ], - "classes": [], - "functions": [ - { - "name": "_load_svmlight_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_svmlight_file", - "decorators": [], - "parameters": [ - { - "name": "f", - "type": "Union[int, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "(Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will be uncompressed on the fly. If an integer is passed, it is assumed to be a file descriptor. A file-like or file descriptor will not be closed by this function. A file-like object must be opened in binary mode." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to use. If None, it will be inferred. This argument is useful to load several files that are subsets of a bigger sliced dataset: each subset might not have examples of every feature, hence the inferred shape might vary from one slice to another. n_features is only required if ``offset`` or ``length`` are passed a non-default value." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Data type of dataset to be loaded. This will be the data type of the output numpy arrays ``X`` and ``y``." - }, - { - "name": "multilabel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Samples may have several labels each (see https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)" - }, - { - "name": "zero_based", - "type": "Union[Literal[\"auto\"], bool]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "Whether column indices in f are zero-based (True) or one-based (False). If column indices are one-based, they are transformed to zero-based to match Python/NumPy conventions. If set to \"auto\", a heuristic check is applied to determine this from the file contents. Both kinds of files occur \"in the wild\", but they are unfortunately not self-identifying. Using \"auto\" or True should always be safe when no ``offset`` or ``length`` is passed. If ``offset`` or ``length`` are passed, the \"auto\" mode falls back to ``zero_based=True`` to avoid having the heuristic check yield inconsistent results on different segments of the file." - }, - { - "name": "query_id", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the query_id array for each file." - }, - { - "name": "offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ignore the offset first bytes by seeking forward, then discarding the following bytes up until the next new line character." - }, - { - "name": "length", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "If strictly positive, stop reading any new line of data once the position in the file has reached the (offset + length) bytes threshold." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When working on\nrepeatedly on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n https://github.com/mblondel/svmlight-loader\n\nParameters\n----------\nf : str, file-like or int\n (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. A file-like or file descriptor will not be closed\n by this function. A file-like object must be opened in binary mode.\n\nn_features : int, default=None\n The number of features to use. If None, it will be inferred. This\n argument is useful to load several files that are subsets of a\n bigger sliced dataset: each subset might not have examples of\n every feature, hence the inferred shape might vary from one\n slice to another.\n n_features is only required if ``offset`` or ``length`` are passed a\n non-default value.\n\ndtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no ``offset`` or ``length`` is passed.\n If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n to ``zero_based=True`` to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n If True, will return the query_id array for each file.\n\noffset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\nlength : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\nX : scipy.sparse matrix of shape (n_samples, n_features)\n\ny : ndarray of shape (n_samples,), or, in the multilabel a list of\n tuples of length n_samples.\n\nquery_id : array of shape (n_samples,)\n query_id for each sample. Only returned when query_id is set to\n True.\n\nSee Also\n--------\nload_svmlight_files : Similar function for loading multiple files in this\n format, enforcing the same number of features/columns on all of them.\n\nExamples\n--------\nTo use joblib.Memory to cache the svmlight file::\n\n from joblib import Memory\n from .datasets import load_svmlight_file\n mem = Memory(\"./mycache\")\n\n @mem.cache\n def get_data():\n data = load_svmlight_file(\"mysvmlightfile\")\n return data[0], data[1]\n\n X, y = get_data()" - }, - { - "name": "_gen_open", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_open_and_load", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_svmlight_files", - "decorators": [], - "parameters": [ - { - "name": "files", - "type": "Union[int, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "(Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will be uncompressed on the fly. If an integer is passed, it is assumed to be a file descriptor. File-likes and file descriptors will not be closed by this function. File-like objects must be opened in binary mode." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to use. If None, it will be inferred from the maximum column index occurring in any of the files. This can be set to a higher value than the actual number of features in any of the input files, but setting it to a lower value will cause an exception to be raised." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Data type of dataset to be loaded. This will be the data type of the output numpy arrays ``X`` and ``y``." - }, - { - "name": "multilabel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Samples may have several labels each (see https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)" - }, - { - "name": "zero_based", - "type": "Union[Literal[\"auto\"], bool]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "Whether column indices in f are zero-based (True) or one-based (False). If column indices are one-based, they are transformed to zero-based to match Python/NumPy conventions. If set to \"auto\", a heuristic check is applied to determine this from the file contents. Both kinds of files occur \"in the wild\", but they are unfortunately not self-identifying. Using \"auto\" or True should always be safe when no offset or length is passed. If offset or length are passed, the \"auto\" mode falls back to zero_based=True to avoid having the heuristic check yield inconsistent results on different segments of the file." - }, - { - "name": "query_id", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the query_id array for each file." - }, - { - "name": "offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ignore the offset first bytes by seeking forward, then discarding the following bytes up until the next new line character." - }, - { - "name": "length", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "If strictly positive, stop reading any new line of data once the position in the file has reached the (offset + length) bytes threshold." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load dataset from multiple files in SVMlight format\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nParameters\n----------\nfiles : array-like, dtype=str, file-like or int\n (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. File-likes and file descriptors will not be\n closed by this function. File-like objects must be opened in binary\n mode.\n\nn_features : int, default=None\n The number of features to use. If None, it will be inferred from the\n maximum column index occurring in any of the files.\n\n This can be set to a higher value than the actual number of features\n in any of the input files, but setting it to a lower value will cause\n an exception to be raised.\n\ndtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no offset or length is passed.\n If offset or length are passed, the \"auto\" mode falls back\n to zero_based=True to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n If True, will return the query_id array for each file.\n\noffset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\nlength : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\n[X1, y1, ..., Xn, yn]\nwhere each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n\nIf query_id is set to True, this will return instead [X1, y1, q1,\n..., Xn, yn, qn] where (Xi, yi, qi) is the result from\nload_svmlight_file(files[i])\n\nNotes\n-----\nWhen fitting a model to a matrix X_train and evaluating it against a\nmatrix X_test, it is essential that X_train and X_test have the same\nnumber of features (X_train.shape[1] == X_test.shape[1]). This may not\nbe the case if you load the files individually with load_svmlight_file.\n\nSee Also\n--------\nload_svmlight_file" - }, - { - "name": "_dump_svmlight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "dump_svmlight_file", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Class labels must be an integer or float, or array-like objects of integer or float for multilabel classifications." - }, - { - "name": "f", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If string, specifies the path that will contain the data. If file-like, data will be written to f. f should be opened in binary mode." - }, - { - "name": "zero_based", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether column indices should be written zero-based (True) or one-based (False)." - }, - { - "name": "comment", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Comment to insert at the top of the file. This should be either a Unicode string, which will be encoded as UTF-8, or an ASCII byte string. If a comment is given, then it will be preceded by one that identifies the file as having been dumped by scikit-learn. Note that not all tools grok comments in SVMlight files." - }, - { - "name": "query_id", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing pairwise preference constraints (qid in svmlight format)." - }, - { - "name": "multilabel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Samples may have several labels each (see https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html) .. versionadded:: 0.17 parameter *multilabel* to support multilabel datasets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dump the dataset in svmlight / libsvm file format.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n Target values. Class labels must be an\n integer or float, or array-like objects of integer or float for\n multilabel classifications.\n\nf : string or file-like in binary mode\n If string, specifies the path that will contain the data.\n If file-like, data will be written to f. f should be opened in binary\n mode.\n\nzero_based : boolean, default=True\n Whether column indices should be written zero-based (True) or one-based\n (False).\n\ncomment : string, default=None\n Comment to insert at the top of the file. This should be either a\n Unicode string, which will be encoded as UTF-8, or an ASCII byte\n string.\n If a comment is given, then it will be preceded by one that identifies\n the file as having been dumped by scikit-learn. Note that not all\n tools grok comments in SVMlight files.\n\nquery_id : array-like of shape (n_samples,), default=None\n Array containing pairwise preference constraints (qid in svmlight\n format).\n\nmultilabel : boolean, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n .. versionadded:: 0.17\n parameter *multilabel* to support multilabel datasets." - } - ] - }, - { - "name": "sklearn.datasets._twenty_newsgroups", - "imports": [ - "import os", - "from os.path import dirname", - "from os.path import join", - "import logging", - "import tarfile", - "import pickle", - "import shutil", - "import re", - "import codecs", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from None import get_data_home", - "from None import load_files", - "from _base import _convert_data_dataframe", - "from _base import _pkl_filepath", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from feature_extraction.text import CountVectorizer", - "from None import preprocessing", - "from utils import check_random_state", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "_download_20newsgroups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Download the 20 newsgroups data and stored it as a zipped pickle." - }, - { - "name": "strip_newsgroup_header", - "decorators": [], - "parameters": [ - { - "name": "text", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The text from which to remove the signature block." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Given text in \"news\" format, strip the headers, by removing everything\nbefore the first blank line.\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block." - }, - { - "name": "strip_newsgroup_quoting", - "decorators": [], - "parameters": [ - { - "name": "text", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The text from which to remove the signature block." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Given text in \"news\" format, strip lines beginning with the quote\ncharacters > or |, plus lines that often introduce a quoted section\n(for example, because they contain the string 'writes:'.)\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block." - }, - { - "name": "strip_newsgroup_footer", - "decorators": [], - "parameters": [ - { - "name": "text", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The text from which to remove the signature block." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Given text in \"news\" format, attempt to remove a signature block.\n\nAs a rough heuristic, we assume that signatures are set apart by either\na blank line or a line made of hyphens, and that it is the last such line\nin the file (disregarding blank lines at the end).\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block." - }, - { - "name": "fetch_20newsgroups", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify a download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "subset", - "type": "Literal['train', 'test', 'all']", - "hasDefault": true, - "default": "'train'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the training set, 'test' for the test set, 'all' for both, with shuffled ordering." - }, - { - "name": "categories", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None (default), load all the categories. If not None, list of category names to load (other categories ignored)." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to shuffle the data: might be important for models that make the assumption that the samples are independent and identically distributed (i.i.d.), such as stochastic gradient descent." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "remove", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "May contain any subset of ('headers', 'footers', 'quotes'). Each of these are kinds of text that will be detected and removed from the newsgroup posts, preventing classifiers from overfitting on metadata. 'headers' removes newsgroup headers, 'footers' removes blocks at the ends of posts that look like signatures, and 'quotes' removes lines that appear to be quoting another post. 'headers' follows an exact standard; the other filters are not always correct." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise an IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns `(data.data, data.target)` instead of a Bunch object. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the filenames and data from the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 1\nFeatures text\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.\n\nParameters\n----------\ndata_home : str, default=None\n Specify a download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\ncategories : array-like, dtype=str or unicode, default=None\n If None (default), load all the categories.\n If not None, list of category names to load (other categories\n ignored).\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nremove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\n 'headers' follows an exact standard; the other filters are not always\n correct.\n\ndownload_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data.data, data.target)` instead of a Bunch\n object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nbunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of shape (n_samples,)\n The data list to learn.\n target: ndarray of shape (n_samples,)\n The target labels.\n filenames: list of shape (n_samples,)\n The path to the location of the data.\n DESCR: str\n The full description of the dataset.\n target_names: list of shape (n_classes,)\n The names of target classes.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22" - }, - { - "name": "fetch_20newsgroups_vectorized", - "decorators": [], - "parameters": [ - { - "name": "subset", - "type": "Literal['train', 'test', 'all']", - "hasDefault": true, - "default": "'train'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the training set, 'test' for the test set, 'all' for both, with shuffled ordering." - }, - { - "name": "remove", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "May contain any subset of ('headers', 'footers', 'quotes'). Each of these are kinds of text that will be detected and removed from the newsgroup posts, preventing classifiers from overfitting on metadata. 'headers' removes newsgroup headers, 'footers' removes blocks at the ends of posts that look like signatures, and 'quotes' removes lines that appear to be quoting another post." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify an download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise an IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20" - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, normalizes each document's feature vector to unit norm using :func:`sklearn.preprocessing.normalize`. .. versionadded:: 0.22" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string, or categorical). The target is a pandas DataFrame or Series depending on the number of `target_columns`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and vectorize the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\nThis is a convenience function; the transformation is done using the\ndefault settings for\n:class:`~sklearn.feature_extraction.text.CountVectorizer`. For more\nadvanced usage (stopword filtering, n-gram extraction, etc.), combine\nfetch_20newsgroups with a custom\n:class:`~sklearn.feature_extraction.text.CountVectorizer`,\n:class:`~sklearn.feature_extraction.text.HashingVectorizer`,\n:class:`~sklearn.feature_extraction.text.TfidfTransformer` or\n:class:`~sklearn.feature_extraction.text.TfidfVectorizer`.\n\nThe resulting counts are normalized using\n:func:`sklearn.preprocessing.normalize` unless normalize is set to False.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 130107\nFeatures real\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.\n\nParameters\n----------\nsubset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\nremove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\ndata_home : str, default=None\n Specify an download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nnormalize : bool, default=True\n If True, normalizes each document's feature vector to unit norm using\n :func:`sklearn.preprocessing.normalize`.\n\n .. versionadded:: 0.22\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string, or categorical). The target is\n a pandas DataFrame or Series depending on the number of\n `target_columns`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nbunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: {sparse matrix, dataframe} of shape (n_samples, n_features)\n The input data matrix. If ``as_frame`` is `True`, ``data`` is\n a pandas DataFrame with sparse columns.\n target: {ndarray, series} of shape (n_samples,)\n The target labels. If ``as_frame`` is `True`, ``target`` is a\n pandas Series.\n target_names: list of shape (n_classes,)\n The names of target classes.\n DESCR: str\n The full description of the dataset.\n frame: dataframe of shape (n_samples, n_features + 1)\n Only present when `as_frame=True`. Pandas DataFrame with ``data``\n and ``target``.\n\n .. versionadded:: 0.24\n\n(data, target) : tuple if ``return_X_y`` is True\n `data` and `target` would be of the format defined in the `Bunch`\n description above.\n\n .. versionadded:: 0.20" - } - ] - }, - { - "name": "sklearn.datasets", - "imports": [ - "from _base import load_breast_cancer", - "from _base import load_boston", - "from _base import load_diabetes", - "from _base import load_digits", - "from _base import load_files", - "from _base import load_iris", - "from _base import load_linnerud", - "from _base import load_sample_images", - "from _base import load_sample_image", - "from _base import load_wine", - "from _base import get_data_home", - "from _base import clear_data_home", - "from _covtype import fetch_covtype", - "from _kddcup99 import fetch_kddcup99", - "from _lfw import fetch_lfw_pairs", - "from _lfw import fetch_lfw_people", - "from _twenty_newsgroups import fetch_20newsgroups", - "from _twenty_newsgroups import fetch_20newsgroups_vectorized", - "from _openml import fetch_openml", - "from _samples_generator import make_classification", - "from _samples_generator import make_multilabel_classification", - "from _samples_generator import make_hastie_10_2", - "from _samples_generator import make_regression", - "from _samples_generator import make_blobs", - "from _samples_generator import make_moons", - "from _samples_generator import make_circles", - "from _samples_generator import make_friedman1", - "from _samples_generator import make_friedman2", - "from _samples_generator import make_friedman3", - "from _samples_generator import make_low_rank_matrix", - "from _samples_generator import make_sparse_coded_signal", - "from _samples_generator import make_sparse_uncorrelated", - "from _samples_generator import make_spd_matrix", - "from _samples_generator import make_swiss_roll", - "from _samples_generator import make_s_curve", - "from _samples_generator import make_sparse_spd_matrix", - "from _samples_generator import make_gaussian_quantiles", - "from _samples_generator import make_biclusters", - "from _samples_generator import make_checkerboard", - "from _svmlight_format_io import load_svmlight_file", - "from _svmlight_format_io import load_svmlight_files", - "from _svmlight_format_io import dump_svmlight_file", - "from _olivetti_faces import fetch_olivetti_faces", - "from _species_distributions import fetch_species_distributions", - "from _california_housing import fetch_california_housing", - "from _rcv1 import fetch_rcv1" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.datasets.tests.conftest", - "imports": [ - "import builtins", - "import pytest" - ], - "classes": [], - "functions": [ - { - "name": "hide_available_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pretend pandas was not installed. " - } - ] - }, - { - "name": "sklearn.datasets.tests.test_20news", - "imports": [ - "from functools import partial", - "from unittest.mock import patch", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.datasets.tests.test_common import check_as_frame", - "from sklearn.datasets.tests.test_common import check_pandas_dependency_message", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.preprocessing import normalize" - ], - "classes": [], - "functions": [ - { - "name": "test_20news", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_20news_length_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Checks the length consistencies within the bunch\n\nThis is a non-regression test for a bug present in 0.16.1." - }, - { - "name": "test_20news_vectorized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_20news_normalization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_20news_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_as_frame_no_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_outdated_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_base", - "imports": [ - "import os", - "import shutil", - "import tempfile", - "import warnings", - "from pickle import loads", - "from pickle import dumps", - "from functools import partial", - "import pytest", - "import numpy as np", - "from sklearn.datasets import get_data_home", - "from sklearn.datasets import clear_data_home", - "from sklearn.datasets import load_files", - "from sklearn.datasets import load_sample_images", - "from sklearn.datasets import load_sample_image", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_linnerud", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.datasets import load_boston", - "from sklearn.datasets import load_wine", - "from sklearn.utils import Bunch", - "from sklearn.datasets.tests.test_common import check_as_frame", - "from sklearn.externals._pilutil import pillow_installed", - "from sklearn.utils import IS_PYPY" - ], - "classes": [], - "functions": [ - { - "name": "_remove_dir", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_home", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_files_root", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_category_dir_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_category_dir_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_data_home", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_empty_load_files", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_load_files", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_files_w_categories_desc_and_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_files_wo_load_content", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_sample_images", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_sample_image", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_missing_sample_image_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loader", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_dataset_frame_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loads_dumps_bunch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bunch_pickle_generated_with_0_16_and_read_with_0_17", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bunch_dir", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_california_housing", - "imports": [ - "import pytest", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from functools import partial" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_asframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_common", - "imports": [ - "import inspect", - "import os", - "import pytest", - "import numpy as np", - "import sklearn.datasets", - "import PIL", - "import pandas" - ], - "classes": [], - "functions": [ - { - "name": "is_pillow_installed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_skip_network_tests", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_func_supporting_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_common_check_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_common_check_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_common_check_pandas_dependency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_covtype", - "imports": [ - "from functools import partial", - "import pytest", - "from sklearn.datasets.tests.test_common import check_return_X_y" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_asframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_kddcup99", - "imports": [ - "from functools import partial", - "import pytest", - "from sklearn.datasets.tests.test_common import check_as_frame", - "from sklearn.datasets.tests.test_common import check_pandas_dependency_message", - "from sklearn.datasets.tests.test_common import check_return_X_y" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch_kddcup99_percent10", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_kddcup99_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_kddcup99_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_kddcup99_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_lfw", - "imports": [ - "import random", - "import os", - "import shutil", - "import tempfile", - "import numpy as np", - "import pytest", - "from functools import partial", - "from sklearn.externals._pilutil import pillow_installed", - "from sklearn.externals._pilutil import imsave", - "from sklearn.datasets import fetch_lfw_pairs", - "from sklearn.datasets import fetch_lfw_people", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import SkipTest", - "from sklearn.datasets.tests.test_common import check_return_X_y" - ], - "classes": [], - "functions": [ - { - "name": "setup_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test fixture run once and common to all tests of this module" - }, - { - "name": "teardown_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test fixture (clean up) run once after all tests of this module" - }, - { - "name": "test_load_empty_lfw_people", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_fake_lfw_people", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_fake_lfw_people_too_restrictive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_empty_lfw_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_fake_lfw_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_olivetti_faces", - "imports": [ - "import numpy as np", - "from sklearn.utils import Bunch", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_olivetti_faces", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_openml", - "imports": [ - "import gzip", - "import json", - "import numpy as np", - "import os", - "import re", - "import scipy.sparse", - "import sklearn", - "import pytest", - "from sklearn import config_context", - "from sklearn.datasets import fetch_openml", - "from sklearn.datasets._openml import _open_openml_url", - "from sklearn.datasets._openml import _arff", - "from sklearn.datasets._openml import _DATA_FILE", - "from sklearn.datasets._openml import _convert_arff_data", - "from sklearn.datasets._openml import _convert_arff_data_dataframe", - "from sklearn.datasets._openml import _get_data_description_by_id", - "from sklearn.datasets._openml import _get_local_path", - "from sklearn.datasets._openml import _retry_with_clean_cache", - "from sklearn.datasets._openml import _feature_to_dtype", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils import is_scalar_nan", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from urllib.error import HTTPError", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.externals._arff import ArffContainerType", - "from functools import partial", - "from sklearn.utils._testing import fails_if_pypy" - ], - "classes": [ - { - "name": "_MockHTTPResponse", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "read", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "close", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__enter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_test_features_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fetch_dataset_from_openml", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_monkey_patch_webbased_functions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_to_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_to_dtype_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_pandas_equal_to_no_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_multitarget_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_anneal_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_cpu_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_australian_pandas_error_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_as_frame_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convert_arff_data_dataframe_warning_low_memory_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_adultcensus_pandas_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_adultcensus_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_miceprotein_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_emotions_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_titanic_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_anneal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_anneal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_anneal_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_cpu", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_cpu", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_australian", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_adultcensus", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_miceprotein", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_emotions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_emotions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_open_openml_url_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_open_openml_url_unlinks_local_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_retry_with_clean_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_retry_with_clean_cache_http_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_notarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_inactive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_nonexiting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_illegal_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warn_ignore_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_string_attribute_without_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dataset_with_openml_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dataset_with_openml_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_illegal_column", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_raises_missing_values_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_raises_illegal_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_with_ignored_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_verify_checksum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convert_arff_data_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_rcv1", - "imports": [ - "import scipy.sparse as sp", - "import numpy as np", - "from functools import partial", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch_rcv1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_samples_generator", - "imports": [ - "from collections import defaultdict", - "from functools import partial", - "import numpy as np", - "import pytest", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.datasets import make_hastie_10_2", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import make_friedman1", - "from sklearn.datasets import make_friedman2", - "from sklearn.datasets import make_friedman3", - "from sklearn.datasets import make_low_rank_matrix", - "from sklearn.datasets import make_moons", - "from sklearn.datasets import make_circles", - "from sklearn.datasets import make_sparse_coded_signal", - "from sklearn.datasets import make_sparse_uncorrelated", - "from sklearn.datasets import make_spd_matrix", - "from sklearn.datasets import make_swiss_roll", - "from sklearn.datasets import make_s_curve", - "from sklearn.datasets import make_biclusters", - "from sklearn.datasets import make_checkerboard", - "from sklearn.utils.validation import assert_all_finite", - "from numpy.linalg import svd", - "from numpy.linalg import eig" - ], - "classes": [], - "functions": [ - { - "name": "test_make_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_classification_informative_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test the construction of informative features in make_classification\n\nAlso tests `n_clusters_per_class`, `n_classes`, `hypercube` and\nfully-specified `weights`." - }, - { - "name": "test_make_classification_weights_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_classification_weights_array_or_list_ok", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_return_sequences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_return_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_return_indicator_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_valid_arguments", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_hastie_10_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_regression_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_n_samples_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_n_samples_list_with_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_n_samples_centers_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_return_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_friedman1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_friedman2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_friedman3", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_low_rank_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_sparse_coded_signal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_sparse_uncorrelated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_spd_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_swiss_roll", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_s_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_biclusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_checkerboard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_moons", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_moons_unbalanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_circles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_circles_unbalanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_svmlight_format", - "imports": [ - "from bz2 import BZ2File", - "import gzip", - "from io import BytesIO", - "import numpy as np", - "import scipy.sparse as sp", - "import os", - "import shutil", - "from tempfile import NamedTemporaryFile", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import fails_if_pypy", - "import sklearn", - "from sklearn.datasets import load_svmlight_file", - "from sklearn.datasets import load_svmlight_files", - "from sklearn.datasets import dump_svmlight_file" - ], - "classes": [], - "functions": [ - { - "name": "test_load_svmlight_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_file_fd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_file_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_files", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_file_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_compressed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_invalid_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_invalid_order_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_zero_based", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_zero_based_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_qid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_large_qid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID" - }, - { - "name": "test_load_invalid_file2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_a_filename", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_filename", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_concise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_comment", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_query_id", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_long_qid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_offsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_offset_exhaustive_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_offsets_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.decomposition.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition._base", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from abc import ABCMeta", - "from abc import abstractmethod" - ], - "classes": [ - { - "name": "_BasePCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "get_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data covariance with the generative model.\n\n``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\nwhere S**2 contains the explained variances, and sigma2 contains the\nnoise variances.\n\nReturns\n-------\ncov : array, shape=(n_features, n_features)\n Estimated covariance of data." - }, - { - "name": "get_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data precision matrix with the generative model.\n\nEquals the inverse of the covariance but computed with\nthe matrix inversion lemma for efficiency.\n\nReturns\n-------\nprecision : array, shape=(n_features, n_features)\n Estimated precision of data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Placeholder for fit. Subclasses should implement this method!\n\nFit the model with X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.decomposition import IncrementalPCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n>>> ipca.fit(X)\nIncrementalPCA(batch_size=3, n_components=2)\n>>> ipca.transform(X) # doctest: +SKIP" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples is the number of samples and n_components is the number of components." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform data back to its original space.\n\nIn other words, return an input X_original whose transform would be X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_components)\n New data, where n_samples is the number of samples\n and n_components is the number of components.\n\nReturns\n-------\nX_original array-like, shape (n_samples, n_features)\n\nNotes\n-----\nIf whitening is enabled, inverse_transform will compute the\nexact inverse operation, which includes reversing whitening." - } - ], - "docstring": "Base class for PCA methods.\n\nWarning: This class should not be used directly.\nUse derived classes instead." - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._dict_learning", - "imports": [ - "import time", - "import sys", - "import itertools", - "from math import ceil", - "import numpy as np", - "from scipy import linalg", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import deprecated", - "from utils import check_array", - "from utils import check_random_state", - "from utils import gen_even_slices", - "from utils import gen_batches", - "from utils.extmath import randomized_svd", - "from utils.extmath import row_norms", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from linear_model import Lasso", - "from linear_model import orthogonal_mp_gram", - "from linear_model import LassoLars", - "from linear_model import Lars" - ], - "classes": [ - { - "name": "_BaseSparseCoding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private method allowing to accomodate both DictionaryLearning and\nSparseCoder." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data to be transformed, must have the same number of features as the data used to train the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - } - ], - "docstring": "Base class from SparseCoder and DictionaryLearning algorithms." - }, - { - "name": "SparseCoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary atoms used for sparse coding. Lines are assumed to be normalized to unit norm." - }, - { - "name": "transform_algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": true, - "default": "'omp'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to transform the data: - `'lars'`: uses the least angle regression method (`linear_model.lars_path`); - `'lasso_lars'`: uses Lars to compute the Lasso solution; - `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if the estimated components are sparse; - `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution; - `'threshold'`: squashes to zero all coefficients less than alpha from the projection ``dictionary * X'``." - }, - { - "name": "transform_n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `transform_n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "transform_alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1." - }, - { - "name": "split_sign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to split the sparse feature vector into the concatenation of its negative part and its positive part. This can improve the performance of downstream classifiers." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "transform_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `lasso_lars`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : Ignored\n\ny : Ignored\n\nReturns\n-------\nself : object" - }, - { - "name": "components_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data to be transformed, must have the same number of features as the data used to train the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_components_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Sparse coding\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary atoms used for sparse coding. Lines are assumed to be\n normalized to unit norm.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n the estimated components are sparse;\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `lasso_lars`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The unchanged dictionary atoms.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `dictionary` instead.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import SparseCoder\n>>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n>>> dictionary = np.array(\n... [[0, 1, 0],\n... [-1, -1, 2],\n... [1, 1, 1],\n... [0, 1, 1],\n... [0, 2, 1]],\n... dtype=np.float64\n... )\n>>> coder = SparseCoder(\n... dictionary=dictionary, transform_algorithm='lasso_lars',\n... transform_alpha=1e-10,\n... )\n>>> coder.transform(X)\narray([[ 0., 0., -1., 0., 0.],\n [ 0., 1., 1., 0., 0.]])\n\nSee Also\n--------\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA\nsparse_encode" - }, - { - "name": "DictionaryLearning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "n_features", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary elements to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for numerical error." - }, - { - "name": "fit_algorithm", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "* `'lars'`: uses the least angle regression method to solve the lasso problem (:func:`~sklearn.linear_model.lars_path`); * `'cd'`: uses the coordinate descent method to compute the Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be faster if the estimated components are sparse. .. versionadded:: 0.17 *cd* coordinate descent method to improve speed." - }, - { - "name": "transform_algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": true, - "default": "'omp'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to transform the data: - `'lars'`: uses the least angle regression method (:func:`~sklearn.linear_model.lars_path`); - `'lasso_lars'`: uses Lars to compute the Lasso solution. - `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'` will be faster if the estimated components are sparse. - `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution. - `'threshold'`: squashes to zero all coefficients less than alpha from the projection ``dictionary * X'``. .. versionadded:: 0.17 *lasso_cd* coordinate descent method to improve speed." - }, - { - "name": "transform_n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `transform_n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "transform_alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1.0" - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "code_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the code, for warm restart." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial values for the dictionary, for warm restart." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "split_sign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to split the sparse feature vector into the concatenation of its negative part and its positive part. This can improve the performance of downstream classifiers." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initializing the dictionary when ``dict_init`` is not specified, randomly shuffling the data when ``shuffle`` is set to ``True``, and updating the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary .. versionadded:: 0.20" - }, - { - "name": "transform_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where `n_samples` in the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where `n_samples` in the number of samples\n and `n_features` is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the object itself." - } - ], - "docstring": "Dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=n_features\n Number of dictionary elements to extract.\n\nalpha : float, default=1.0\n Sparsity controlling parameter.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for numerical error.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (:func:`~sklearn.linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n faster if the estimated components are sparse.\n\n .. versionadded:: 0.17\n *cd* coordinate descent method to improve speed.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (:func:`~sklearn.linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n will be faster if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n .. versionadded:: 0.17\n *lasso_cd* coordinate descent method to improve speed.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.0\n\nn_jobs : int or None, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the code, for warm restart.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the dictionary, for warm restart.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n dictionary atoms extracted from the data\n\nerror_ : array\n vector of errors at each iteration\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import DictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n... random_state=42,\n... )\n>>> dict_learner = DictionaryLearning(\n... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.88...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.07...\n\nNotes\n-----\n**References:**\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nSee Also\n--------\nSparseCoder\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - }, - { - "name": "MiniBatchDictionaryLearning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary elements to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Total number of iterations to perform." - }, - { - "name": "fit_algorithm", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used: - `'lars'`: uses the least angle regression method to solve the lasso problem (`linear_model.lars_path`) - `'cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). Lars will be faster if the estimated components are sparse." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of samples in each mini-batch." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the samples before forming batches." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "initial value of the dictionary for warm restart scenarios" - }, - { - "name": "transform_algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": true, - "default": "'omp'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to transform the data: - `'lars'`: uses the least angle regression method (`linear_model.lars_path`); - `'lasso_lars'`: uses Lars to compute the Lasso solution. - `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster if the estimated components are sparse. - `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution. - `'threshold'`: squashes to zero all coefficients less than alpha from the projection ``dictionary * X'``." - }, - { - "name": "transform_n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `transform_n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "transform_alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "split_sign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to split the sparse feature vector into the concatenation of its negative part and its positive part. This can improve the performance of downstream classifiers." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initializing the dictionary when ``dict_init`` is not specified, randomly shuffling the data when ``shuffle`` is set to ``True``, and updating the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - }, - { - "name": "transform_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "iter_offset", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of iteration on data batches that has been performed before this call to partial_fit. This is optional: if no number is passed, the memory of the object is used." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Updates the model using the data in X as a mini-batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\niter_offset : int, default=None\n The number of iteration on data batches that has been\n performed before this call to partial_fit. This is optional:\n if no number is passed, the memory of the object is\n used.\n\nReturns\n-------\nself : object\n Returns the instance itself." - } - ], - "docstring": "Mini-batch dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of dictionary elements to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter.\n\nn_iter : int, default=1000\n Total number of iterations to perform.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n The algorithm used:\n\n - `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`)\n - `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nbatch_size : int, default=3\n Number of samples in each mini-batch.\n\nshuffle : bool, default=True\n Whether to shuffle the samples before forming batches.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n initial value of the dictionary for warm restart scenarios\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components extracted from the data.\n\ninner_stats_ : tuple of (A, B) ndarrays\n Internal sufficient statistics that are kept by the algorithm.\n Keeping them is useful in online settings, to avoid losing the\n history of the evolution, but they shouldn't have any use for the\n end user.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\nn_iter_ : int\n Number of iterations run.\n\niter_offset_ : int\n The number of iteration on data batches that has been\n performed before.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generattor or by `np.random`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import MiniBatchDictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n... random_state=42)\n>>> dict_learner = MiniBatchDictionaryLearning(\n... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.87...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.10...\n\nNotes\n-----\n**References:**\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nSee Also\n--------\nSparseCoder\nDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - } - ], - "functions": [ - { - "name": "_check_positive_coding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_encode", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows." - }, - { - "name": "gram", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix, `dictionary * dictionary'` gram can be `None` if method is 'threshold'." - }, - { - "name": "cov", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed covariance, `dictionary * X'`." - }, - { - "name": "algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used: * `'lars'`: uses the least angle regression method (`linear_model.lars_path`); * `'lasso_lars'`: uses Lars to compute the Lasso solution; * `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if the estimated components are sparse; * `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution; * `'threshold'`: squashes to zero all coefficients less than regularization from the projection `dictionary * data'`." - }, - { - "name": "regularization", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter. It corresponds to alpha when algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`. Otherwise it corresponds to `n_nonzero_coefs`." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization value of the sparse code. Only used if `algorithm='lasso_cd'`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`." - }, - { - "name": "copy_cov", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy the precomputed covariance matrix; if `False`, it may be overwritten." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If `False`, the input arrays `X` and dictionary will not be checked." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages." - }, - { - "name": "positive: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce a positivity constraint on the sparse code. .. versionadded:: 0.20" - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce a positivity constraint on the sparse code. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generic sparse coding.\n\nEach column of the result is the solution to a Lasso problem.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows.\n\ngram : ndarray of shape (n_components, n_components) or None\n Precomputed Gram matrix, `dictionary * dictionary'`\n gram can be `None` if method is 'threshold'.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary * X'`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\nregularization : int or float, default=None\n The regularization parameter. It corresponds to alpha when\n algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n Otherwise it corresponds to `n_nonzero_coefs`.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse code. Only used if\n `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\ncopy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\ncheck_input : bool, default=True\n If `False`, the input arrays `X` and dictionary will not be checked.\n\nverbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\npositive: bool, default=False\n Whether to enforce a positivity constraint on the sparse code.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_components, n_features)\n The sparse codes.\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder" - }, - { - "name": "sparse_encode", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows for meaningful output." - }, - { - "name": "gram", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix, `dictionary * dictionary'`." - }, - { - "name": "cov", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed covariance, `dictionary' * X`." - }, - { - "name": "algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used: * `'lars'`: uses the least angle regression method (`linear_model.lars_path`); * `'lasso_lars'`: uses Lars to compute the Lasso solution; * `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if the estimated components are sparse; * `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution; * `'threshold'`: squashes to zero all coefficients less than regularization from the projection `dictionary * data'`." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1." - }, - { - "name": "copy_cov", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy the precomputed covariance matrix; if `False`, it may be overwritten." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization value of the sparse codes. Only used if `algorithm='lasso_cd'`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If `False`, the input arrays X and dictionary will not be checked." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the encoding. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sparse coding\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows for meaningful\n output.\n\ngram : ndarray of shape (n_components, n_components), default=None\n Precomputed Gram matrix, `dictionary * dictionary'`.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary' * X`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\nn_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `n_nonzero_coefs=int(n_features / 10)`.\n\nalpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\ncopy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse codes. Only used if\n `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ncheck_input : bool, default=True\n If `False`, the input arrays X and dictionary will not be checked.\n\nverbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\npositive : bool, default=False\n Whether to enforce positivity when finding the encoding.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n The sparse codes\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder" - }, - { - "name": "_update_dict", - "decorators": [], - "parameters": [ - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value of the dictionary at the previous iteration." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "code", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sparse coding of the data against which to optimize the dictionary." - }, - { - "name": "verbose: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degree of output the procedure will print." - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degree of output the procedure will print." - }, - { - "name": "return_r2", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to compute and return the residual sum of squares corresponding to the computed solution." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomly initializing the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update the dense dictionary factor in place.\n\nParameters\n----------\ndictionary : ndarray of shape (n_features, n_components)\n Value of the dictionary at the previous iteration.\n\nY : ndarray of shape (n_features, n_samples)\n Data matrix.\n\ncode : ndarray of shape (n_components, n_samples)\n Sparse coding of the data against which to optimize the dictionary.\n\nverbose: bool, default=False\n Degree of output the procedure will print.\n\nreturn_r2 : bool, default=False\n Whether to compute and return the residual sum of squares corresponding\n to the computed solution.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\npositive : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndictionary : ndarray of shape (n_features, n_components)\n Updated dictionary." - }, - { - "name": "dict_learning", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary atoms to extract." - }, - { - "name": "alpha", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the stopping condition." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "The method used: * `'lars'`: uses the least angle regression method to solve the lasso problem (`linear_model.lars_path`); * `'cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). Lars will be faster if the estimated components are sparse." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the dictionary for warm restart scenarios." - }, - { - "name": "code_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the sparse code for warm restart scenarios." - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable that gets invoked every five iterations" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomly initializing the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "method_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\nn_components : int\n Number of dictionary atoms to extract.\n\nalpha : int\n Sparsity controlling parameter.\n\nmax_iter : int, default=100\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n The method used:\n\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the sparse code for warm restart scenarios.\n\ncallback : callable, default=None\n Callable that gets invoked every five iterations\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n Maximum number of iterations to perform.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n The sparse code factor in the matrix factorization.\n\ndictionary : ndarray of shape (n_components, n_features),\n The dictionary factor in the matrix factorization.\n\nerrors : array\n Vector of errors at each iteration.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nSee Also\n--------\ndict_learning_online\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - }, - { - "name": "dict_learning_online", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary atoms to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of mini-batch iterations to perform." - }, - { - "name": "return_code", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to also return the code U or just the dictionary `V`." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the dictionary for warm restart scenarios." - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "callable that gets invoked every five iterations." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to take in each batch." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the data before splitting it in batches." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "* `'lars'`: uses the least angle regression method to solve the lasso problem (`linear_model.lars_path`); * `'cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). Lars will be faster if the estimated components are sparse." - }, - { - "name": "iter_offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Number of previous iterations completed on the dictionary used for initialization." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initializing the dictionary when ``dict_init`` is not specified, randomly shuffling the data when ``shuffle`` is set to ``True``, and updating the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_inner_stats", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return the inner statistics A (dictionary covariance) and B (data approximation). Useful to restart the algorithm in an online setting. If `return_inner_stats` is `True`, `return_code` is ignored." - }, - { - "name": "inner_stats", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Inner sufficient statistics that are kept by the algorithm. Passing them at initialization is useful in online settings, to avoid losing the history of the evolution. `A` `(n_components, n_components)` is the dictionary covariance matrix. `B` `(n_features, n_components)` is the data approximation matrix." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "method_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform when solving the lasso problem. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. This is\naccomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\nn_components : int, default=2\n Number of dictionary atoms to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter.\n\nn_iter : int, default=100\n Number of mini-batch iterations to perform.\n\nreturn_code : bool, default=True\n Whether to also return the code U or just the dictionary `V`.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios.\n\ncallback : callable, default=None\n callable that gets invoked every five iterations.\n\nbatch_size : int, default=3\n The number of samples to take in each batch.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nshuffle : bool, default=True\n Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\niter_offset : int, default=0\n Number of previous iterations completed on the dictionary used for\n initialization.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nreturn_inner_stats : bool, default=False\n Return the inner statistics A (dictionary covariance) and B\n (data approximation). Useful to restart the algorithm in an\n online setting. If `return_inner_stats` is `True`, `return_code` is\n ignored.\n\ninner_stats : tuple of (A, B) ndarrays, default=None\n Inner sufficient statistics that are kept by the algorithm.\n Passing them at initialization is useful in online settings, to\n avoid losing the history of the evolution.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n Maximum number of iterations to perform when solving the lasso problem.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components),\n The sparse code (only returned if `return_code=True`).\n\ndictionary : ndarray of shape (n_components, n_features),\n The solutions to the dictionary learning problem.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to `True`.\n\nSee Also\n--------\ndict_learning\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - } - ] - }, - { - "name": "sklearn.decomposition._factor_analysis", - "imports": [ - "import warnings", - "from math import sqrt", - "from math import log", - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils.extmath import fast_logdet", - "from utils.extmath import randomized_svd", - "from utils.extmath import squared_norm", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "FactorAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of latent space, the number of components of ``X`` that are obtained after ``transform``. If None, n_components is set to the number of features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping tolerance for log-likelihood increase." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of X. If ``False``, the input X gets overwritten during fitting." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "noise_variance_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial guess of the noise variance for each feature. If None, it defaults to np.ones(n_features)." - }, - { - "name": "svd_method", - "type": "Literal['lapack', 'randomized']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "Which SVD method to use. If 'lapack' use standard SVD from scipy.linalg, if 'randomized' use fast ``randomized_svd`` function. Defaults to 'randomized'. For most applications 'randomized' will be sufficiently precise while providing significant speed gains. Accuracy can also be improved by setting higher values for `iterated_power`. If this is not sufficient, for maximum precision you should choose 'lapack'." - }, - { - "name": "iterated_power", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations for the power method. 3 by default. Only used if ``svd_method`` equals 'randomized'." - }, - { - "name": "rotation", - "type": "Literal['varimax', 'quartimax']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, apply the indicated rotation. Currently, varimax and quartimax are implemented. See `\"The varimax criterion for analytic rotation in factor analysis\" `_ H. F. Kaiser, 1958. .. versionadded:: 0.24" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Only used when ``svd_method`` equals 'randomized'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the FactorAnalysis model to X using SVD based approach\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply dimensionality reduction to X using the model.\n\nCompute the expected mean of the latent variables.\nSee Barber, 21.2.33 (or Bishop, 12.66).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n The latent variables of X." - }, - { - "name": "get_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data covariance with the FactorAnalysis model.\n\n``cov = components_.T * components_ + diag(noise_variance)``\n\nReturns\n-------\ncov : ndarray of shape (n_features, n_features)\n Estimated covariance of data." - }, - { - "name": "get_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data precision matrix with the FactorAnalysis model.\n\nReturns\n-------\nprecision : ndarray of shape (n_features, n_features)\n Estimated precision of data." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log-likelihood of each sample\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data\n\nReturns\n-------\nll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the average log-likelihood of the samples\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nll : float\n Average log-likelihood of the samples under the current model" - }, - { - "name": "_rotate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rotate the factor analysis solution." - } - ], - "docstring": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PPCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int, default=None\n Dimensionality of latent space, the number of components\n of ``X`` that are obtained after ``transform``.\n If None, n_components is set to the number of features.\n\ntol : float, defaul=1e-2\n Stopping tolerance for log-likelihood increase.\n\ncopy : bool, default=True\n Whether to make a copy of X. If ``False``, the input X gets overwritten\n during fitting.\n\nmax_iter : int, default=1000\n Maximum number of iterations.\n\nnoise_variance_init : ndarray of shape (n_features,), default=None\n The initial guess of the noise variance for each feature.\n If None, it defaults to np.ones(n_features).\n\nsvd_method : {'lapack', 'randomized'}, default='randomized'\n Which SVD method to use. If 'lapack' use standard SVD from\n scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n Defaults to 'randomized'. For most applications 'randomized' will\n be sufficiently precise while providing significant speed gains.\n Accuracy can also be improved by setting higher values for\n `iterated_power`. If this is not sufficient, for maximum precision\n you should choose 'lapack'.\n\niterated_power : int, default=3\n Number of iterations for the power method. 3 by default. Only used\n if ``svd_method`` equals 'randomized'.\n\nrotation : {'varimax', 'quartimax'}, default=None\n If not None, apply the indicated rotation. Currently, varimax and\n quartimax are implemented. See\n `\"The varimax criterion for analytic rotation in factor analysis\"\n `_\n H. F. Kaiser, 1958.\n\n .. versionadded:: 0.24\n\nrandom_state : int or RandomState instance, default=0\n Only used when ``svd_method`` equals 'randomized'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\nloglike_ : list of shape (n_iterations,)\n The log likelihood at each iteration.\n\nnoise_variance_ : ndarray of shape (n_features,)\n The estimated noise variance for each feature.\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FactorAnalysis\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FactorAnalysis(n_components=7, random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nReferences\n----------\n- David Barber, Bayesian Reasoning and Machine Learning,\n Algorithm 21.1.\n\n- Christopher M. Bishop: Pattern Recognition and Machine Learning,\n Chapter 12.2.4.\n\nSee Also\n--------\nPCA: Principal component analysis is also a latent linear variable model\n which however assumes equal noise variance for each feature.\n This extra assumption makes probabilistic PCA faster as it can be\n computed in closed form.\nFastICA: Independent component analysis, a latent variable model with\n non-Gaussian latent variables." - } - ], - "functions": [ - { - "name": "_ortho_rotation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return rotated components." - } - ] - }, - { - "name": "sklearn.decomposition._fastica", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from exceptions import ConvergenceWarning", - "from utils import check_array", - "from utils import as_float_array", - "from utils import check_random_state", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "FastICA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to use. If None is passed, all are used." - }, - { - "name": "algorithm", - "type": "Literal['parallel', 'deflation']", - "hasDefault": true, - "default": "'parallel'", - "limitation": null, - "ignored": false, - "docstring": "Apply parallel or deflational algorithm for FastICA." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If whiten is false, the data is already considered to be whitened, and no whitening is performed." - }, - { - "name": "fun", - "type": "Literal['logcosh', 'exp', 'cube']", - "hasDefault": true, - "default": "'logcosh'", - "limitation": null, - "ignored": false, - "docstring": "The functional form of the G function used in the approximation to neg-entropy. Could be either 'logcosh', 'exp', or 'cube'. You can also provide your own function. It should return a tuple containing the value of the function, and of its derivative, in the point. Example:: def my_g(x): return x ** 3, (3 * x ** 2).mean(axis=-1)" - }, - { - "name": "fun_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments to send to the functional form. If empty and if fun='logcosh', fun_args will take value {'alpha' : 1.0}." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations during fit." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance on update at each iteration." - }, - { - "name": "w_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The mixing matrix to be used to initialize the algorithm." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to initialize ``w_init`` when not specified, with a normal distribution. Pass an int, for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "compute_sources", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If False, sources are not computes but only the rotation matrix. This can save memory when working with big data. Defaults to False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ncompute_sources : bool, default=False\n If False, sources are not computes but only the rotation matrix.\n This can save memory when working with big data. Defaults to False.\n\nReturns\n-------\n X_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model and recover the sources from X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, data passed to fit can be overwritten. Defaults to True." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Recover the sources from X (apply the unmixing matrix).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform, where n_samples is the number of samples\n and n_features is the number of features.\n\ncopy : bool, default=True\n If False, data passed to fit can be overwritten. Defaults to True.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sources, where n_samples is the number of samples and n_components is the number of components." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, data passed to fit are overwritten. Defaults to True." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the sources back to the mixed data (apply mixing matrix).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n Sources, where n_samples is the number of samples\n and n_components is the number of components.\ncopy : bool, default=True\n If False, data passed to fit are overwritten. Defaults to True.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)" - } - ], - "docstring": "FastICA: a fast algorithm for Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n Apply parallel or deflational algorithm for FastICA.\n\nwhiten : bool, default=True\n If whiten is false, the data is already considered to be\n whitened, and no whitening is performed.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. Example::\n\n def my_g(x):\n return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n Arguments to send to the functional form.\n If empty and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n Maximum number of iterations during fit.\n\ntol : float, default=1e-4\n Tolerance on update at each iteration.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n The mixing matrix to be used to initialize the algorithm.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The linear operator to apply to the data to get the independent\n sources. This is equal to the unmixing matrix when ``whiten`` is\n False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n ``whiten`` is True.\n\nmixing_ : ndarray of shape (n_features, n_components)\n The pseudo-inverse of ``components_``. It is the linear operator\n that maps independent sources to the data.\n\nmean_ : ndarray of shape(n_features,)\n The mean over features. Only set if `self.whiten` is True.\n\nn_iter_ : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge.\n\nwhitening_ : ndarray of shape (n_components, n_features)\n Only set if whiten is 'True'. This is the pre-whitening matrix\n that projects data onto the first `n_components` principal components.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FastICA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FastICA(n_components=7,\n... random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nNotes\n-----\nImplementation based on\n*A. Hyvarinen and E. Oja, Independent Component Analysis:\nAlgorithms and Applications, Neural Networks, 13(4-5), 2000,\npp. 411-430*" - } - ], - "functions": [ - { - "name": "_gs_decorrelation", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array to be orthogonalized" - }, - { - "name": "W", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Null space definition" - }, - { - "name": "j", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The no of (from the first) rows of Null space W wrt which w is orthogonalized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthonormalize w wrt the first j rows of W.\n\nParameters\n----------\nw : ndarray of shape (n,)\n Array to be orthogonalized\n\nW : ndarray of shape (p, n)\n Null space definition\n\nj : int < p\n The no of (from the first) rows of Null space W wrt which w is\n orthogonalized.\n\nNotes\n-----\nAssumes that W is orthogonal\nw changed in place" - }, - { - "name": "_sym_decorrelation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Symmetric decorrelation\ni.e. W <- (W * W.T) ^{-1/2} * W" - }, - { - "name": "_ica_def", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Deflationary FastICA using fun approx to neg-entropy function\n\nUsed internally by FastICA." - }, - { - "name": "_ica_par", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Parallel FastICA.\n\nUsed internally by FastICA --main loop" - }, - { - "name": "_logcosh", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_exp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_cube", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fastica", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to extract. If None no dimension reduction is performed." - }, - { - "name": "algorithm", - "type": "Literal['parallel', 'deflation']", - "hasDefault": true, - "default": "'parallel'", - "limitation": null, - "ignored": false, - "docstring": "Apply a parallel or deflational FASTICA algorithm." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True perform an initial whitening of the data. If False, the data is assumed to have already been preprocessed: it should be centered, normed and white. Otherwise you will get incorrect results. In this case the parameter n_components will be ignored." - }, - { - "name": "fun", - "type": "Literal['logcosh', 'exp', 'cube']", - "hasDefault": true, - "default": "'logcosh'", - "limitation": null, - "ignored": false, - "docstring": "The functional form of the G function used in the approximation to neg-entropy. Could be either 'logcosh', 'exp', or 'cube'. You can also provide your own function. It should return a tuple containing the value of the function, and of its derivative, in the point. The derivative should be averaged along its last dimension. Example: def my_g(x): return x ** 3, np.mean(3 * x ** 2, axis=-1)" - }, - { - "name": "fun_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments to send to the functional form. If empty or None and if fun='logcosh', fun_args will take value {'alpha' : 1.0}" - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-04", - "limitation": null, - "ignored": false, - "docstring": "A positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged." - }, - { - "name": "w_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial un-mixing array of dimension (n.comp,n.comp). If None (default) then an array of normal r.v.'s is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to initialize ``w_init`` when not specified, with a normal distribution. Pass an int, for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_X_mean", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, X_mean is returned too." - }, - { - "name": "compute_sources", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, sources are not computed, but only the rotation matrix. This can save memory when working with big data. Defaults to True." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform Fast Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nn_components : int, default=None\n Number of components to extract. If None no dimension reduction\n is performed.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n Apply a parallel or deflational FASTICA algorithm.\n\nwhiten : bool, default=True\n If True perform an initial whitening of the data.\n If False, the data is assumed to have already been\n preprocessed: it should be centered, normed and white.\n Otherwise you will get incorrect results.\n In this case the parameter n_components will be ignored.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. The derivative should be averaged along its last dimension.\n Example:\n\n def my_g(x):\n return x ** 3, np.mean(3 * x ** 2, axis=-1)\n\nfun_args : dict, default=None\n Arguments to send to the functional form.\n If empty or None and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}\n\nmax_iter : int, default=200\n Maximum number of iterations to perform.\n\ntol : float, default=1e-04\n A positive scalar giving the tolerance at which the\n un-mixing matrix is considered to have converged.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n Initial un-mixing array of dimension (n.comp,n.comp).\n If None (default) then an array of normal r.v.'s is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nreturn_X_mean : bool, default=False\n If True, X_mean is returned too.\n\ncompute_sources : bool, default=True\n If False, sources are not computed, but only the rotation matrix.\n This can save memory when working with big data. Defaults to True.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\nK : ndarray of shape (n_components, n_features) or None\n If whiten is 'True', K is the pre-whitening matrix that projects data\n onto the first n_components principal components. If whiten is 'False',\n K is 'None'.\n\nW : ndarray of shape (n_components, n_components)\n The square matrix that unmixes the data after whitening.\n The mixing matrix is the pseudo-inverse of matrix ``W K``\n if K is not None, else it is the inverse of W.\n\nS : ndarray of shape (n_samples, n_components) or None\n Estimated source matrix\n\nX_mean : ndarray of shape (n_features,)\n The mean over features. Returned only if return_X_mean is True.\n\nn_iter : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge. This is\n returned only when return_n_iter is set to `True`.\n\nNotes\n-----\n\nThe data matrix X is considered to be a linear combination of\nnon-Gaussian (independent) components i.e. X = AS where columns of S\ncontain the independent components and A is a linear mixing\nmatrix. In short ICA attempts to `un-mix' the data by estimating an\nun-mixing matrix W where ``S = W K X.``\nWhile FastICA was proposed to estimate as many sources\nas features, it is possible to estimate less by setting\nn_components < n_features. It this case K is not a square matrix\nand the estimated A is the pseudo-inverse of ``W K``.\n\nThis implementation was originally made for data of shape\n[n_features, n_samples]. Now the input is transposed\nbefore the algorithm is applied. This makes it slightly\nfaster for Fortran-ordered input.\n\nImplemented using FastICA:\n*A. Hyvarinen and E. Oja, Independent Component Analysis:\nAlgorithms and Applications, Neural Networks, 13(4-5), 2000,\npp. 411-430*" - } - ] - }, - { - "name": "sklearn.decomposition._incremental_pca", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from scipy import sparse", - "from _base import _BasePCA", - "from utils import gen_batches", - "from utils.extmath import svd_flip", - "from utils.extmath import _incremental_mean_and_var", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "IncrementalPCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. If ``n_components`` is ``None``, then ``n_components`` is set to ``min(n_samples, n_features)``." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When True (False by default) the ``components_`` vectors are divided by ``n_samples`` times ``components_`` to ensure uncorrelated outputs with unit component-wise variances. Whitening will remove some information from the transformed signal (the relative variance scales of the components) but can sometimes improve the predictive accuracy of the downstream estimators by making data respect some hard-wired assumptions." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, X will be overwritten. ``copy=False`` can be used to save memory but is unsafe for general use." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to use for each batch. Only used when calling ``fit``. If ``batch_size`` is ``None``, then ``batch_size`` is inferred from the data and set to ``5 * n_features``, to provide a balance between approximation accuracy and memory consumption." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X, using minibatches of size batch_size.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Run check_array on X." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit with X. All of X is processed as a single batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ncheck_input : bool, default=True\n Run check_array on X.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set, using minibatches of size batch_size if X is\nsparse.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.decomposition import IncrementalPCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n... [1, 1], [2, 1], [3, 2]])\n>>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n>>> ipca.fit(X)\nIncrementalPCA(batch_size=3, n_components=2)\n>>> ipca.transform(X) # doctest: +SKIP" - } - ], - "docstring": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nn_components : int, default=None\n Number of components to keep. If ``n_components`` is ``None``,\n then ``n_components`` is set to ``min(n_samples, n_features)``.\n\nwhiten : bool, default=False\n When True (False by default) the ``components_`` vectors are divided\n by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometimes\n improve the predictive accuracy of the downstream estimators by\n making data respect some hard-wired assumptions.\n\ncopy : bool, default=True\n If False, X will be overwritten. ``copy=False`` can be used to\n save memory but is unsafe for general use.\n\nbatch_size : int, default=None\n The number of samples to use for each batch. Only used when calling\n ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n is inferred from the data and set to ``5 * n_features``, to provide a\n balance between approximation accuracy and memory consumption.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\nexplained_variance_ : ndarray of shape (n_components,)\n Variance explained by each of the selected components.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If all components are stored, the sum of explained variances is equal\n to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\nvar_ : ndarray of shape (n_features,)\n Per-feature empirical variance, aggregate over calls to\n ``partial_fit``.\n\nnoise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf.\n\nn_components_ : int\n The estimated number of components. Relevant when\n ``n_components=None``.\n\nn_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\nbatch_size_ : int\n Inferred batch size from ``batch_size``.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import IncrementalPCA\n>>> from scipy import sparse\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n>>> # either partially fit on smaller batches of data\n>>> transformer.partial_fit(X[:100, :])\nIncrementalPCA(batch_size=200, n_components=7)\n>>> # or let the fit function itself divide the data into batches\n>>> X_sparse = sparse.csr_matrix(X)\n>>> X_transformed = transformer.fit_transform(X_sparse)\n>>> X_transformed.shape\n(1797, 7)\n\nNotes\n-----\nImplements the incremental PCA model from:\n*D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\npp. 125-141, May 2008.*\nSee https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\nThis model is an extension of the Sequential Karhunen-Loeve Transform from:\n*A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\nits Application to Images, IEEE Transactions on Image Processing, Volume 9,\nNumber 8, pp. 1371-1374, August 2000.*\nSee https://www.cs.technion.ac.il/~mic/doc/skl-ip.pdf\n\nWe have specifically abstained from an optimization used by authors of both\npapers, a QR decomposition used in specific situations to reduce the\nalgorithmic complexity of the SVD. The source for this technique is\n*Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\nsection 5.4.4, pp 252-253.*. This technique has been omitted because it is\nadvantageous only when decomposing a matrix with ``n_samples`` (rows)\n>= 5/3 * ``n_features`` (columns), and hurts the readability of the\nimplemented algorithm. This would be a good opportunity for future\noptimization, if it is deemed necessary.\n\nReferences\n----------\nD. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77,\nIssue 1-3, pp. 125-141, May 2008.\n\nG. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\nSection 5.4.4, pp. 252-253.\n\nSee Also\n--------\nPCA\nKernelPCA\nSparsePCA\nTruncatedSVD" - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._kernel_pca", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from scipy.sparse.linalg import eigsh", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import svd_flip", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_psd_eigenvalues", - "from utils.deprecation import deprecated", - "from exceptions import NotFittedError", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from preprocessing import KernelCenterer", - "from metrics.pairwise import pairwise_kernels", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KernelPCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components. If None, all non-zero components are kept." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']", - "hasDefault": true, - "default": "'linear'", - "limitation": null, - "ignored": false, - "docstring": "Kernel used for PCA." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree for poly kernels. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Independent term in poly and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True)." - }, - { - "name": "fit_inverse_transform", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point)" - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'dense', 'arpack']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for arpack. If None, optimal value will be chosen by arpack." - }, - { - "name": "remove_zero_eig", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. .. versionadded:: 0.18" - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, input X is copied and stored by the model in the `X_fit_` attribute. If no further changes will be done to X, setting `copy_X=False` saves memory by storing a reference. .. versionadded:: 0.18" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit's using kernel K" - }, - { - "name": "_fit_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X back to original space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_components)\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)\n\nReferences\n----------\n\"Learning to Find Pre-Images\", G BakIr et al, 2004." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Kernel Principal component analysis (KPCA).\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components. If None, all non-zero components are kept.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}, default='linear'\n Kernel used for PCA.\n\ngamma : float, default=None\n Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\ndegree : int, default=3\n Degree for poly kernels. Ignored by other kernels.\n\ncoef0 : float, default=1\n Independent term in poly and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict, default=None\n Parameters (keyword arguments) and\n values for kernel passed as callable object.\n Ignored by other kernels.\n\nalpha : float, default=1.0\n Hyperparameter of the ridge regression that learns the\n inverse transform (when fit_inverse_transform=True).\n\nfit_inverse_transform : bool, default=False\n Learn the inverse transform for non-precomputed kernels.\n (i.e. learn to find the pre-image of a point)\n\neigen_solver : {'auto', 'dense', 'arpack'}, default='auto'\n Select eigensolver to use. If n_components is much less than\n the number of training samples, arpack may be more efficient\n than the dense eigensolver.\n\ntol : float, default=0\n Convergence tolerance for arpack.\n If 0, optimal value will be chosen by arpack.\n\nmax_iter : int, default=None\n Maximum number of iterations for arpack.\n If None, optimal value will be chosen by arpack.\n\nremove_zero_eig : bool, default=False\n If True, then all components with zero eigenvalues are removed, so\n that the number of components in the output may be < n_components\n (and sometimes even zero due to numerical instability).\n When n_components is None, this parameter is ignored and components\n with zero eigenvalues are removed regardless.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18\n\ncopy_X : bool, default=True\n If True, input X is copied and stored by the model in the `X_fit_`\n attribute. If no further changes will be done to X, setting\n `copy_X=False` saves memory by storing a reference.\n\n .. versionadded:: 0.18\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nAttributes\n----------\nlambdas_ : ndarray of shape (n_components,)\n Eigenvalues of the centered kernel matrix in decreasing order.\n If `n_components` and `remove_zero_eig` are not set,\n then all values are stored.\n\nalphas_ : ndarray of shape (n_samples, n_components)\n Eigenvectors of the centered kernel matrix. If `n_components` and\n `remove_zero_eig` are not set, then all components are stored.\n\ndual_coef_ : ndarray of shape (n_samples, n_features)\n Inverse transform matrix. Only available when\n ``fit_inverse_transform`` is True.\n\nX_transformed_fit_ : ndarray of shape (n_samples, n_components)\n Projection of the fitted data on the kernel principal components.\n Only available when ``fit_inverse_transform`` is True.\n\nX_fit_ : ndarray of shape (n_samples, n_features)\n The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n a reference. This attribute is used for the calls to transform.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import KernelPCA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = KernelPCA(n_components=7, kernel='linear')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nReferences\n----------\nKernel PCA was introduced in:\n Bernhard Schoelkopf, Alexander J. Smola,\n and Klaus-Robert Mueller. 1999. Kernel principal\n component analysis. In Advances in kernel methods,\n MIT Press, Cambridge, MA, USA 327-352." - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._lda", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.special import gammaln", - "from scipy.special import logsumexp", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils import gen_batches", - "from utils import gen_even_slices", - "from utils.validation import check_non_negative", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from _online_lda_fast import mean_change", - "from _online_lda_fast import _dirichlet_expectation_1d", - "from _online_lda_fast import _dirichlet_expectation_2d" - ], - "classes": [ - { - "name": "LatentDirichletAllocation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of topics. .. versionchanged:: 0.19 ``n_topics`` was renamed to ``n_components``" - }, - { - "name": "doc_topic_prior", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior of document topic distribution `theta`. If the value is None, defaults to `1 / n_components`. In [1]_, this is called `alpha`." - }, - { - "name": "topic_word_prior", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior of topic word distribution `beta`. If the value is None, defaults to `1 / n_components`. In [1]_, this is called `eta`." - }, - { - "name": "learning_method", - "type": "Literal['batch', 'online']", - "hasDefault": true, - "default": "'batch'", - "limitation": null, - "ignored": false, - "docstring": "Method used to update `_component`. Only used in :meth:`fit` method. In general, if the data size is large, the online update will be much faster than the batch update. Valid options:: 'batch': Batch variational Bayes method. Use all training data in each EM update. Old `components_` will be overwritten in each iteration. 'online': Online variational Bayes method. In each EM update, use mini-batch of training data to update the ``components_`` variable incrementally. The learning rate is controlled by the ``learning_decay`` and the ``learning_offset`` parameters. .. versionchanged:: 0.20 The default learning method is now ``\"batch\"``." - }, - { - "name": "learning_decay", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "It is a parameter that control learning rate in the online learning method. The value should be set between (0.5, 1.0] to guarantee asymptotic convergence. When the value is 0.0 and batch_size is ``n_samples``, the update method is same as batch learning. In the literature, this is called kappa." - }, - { - "name": "learning_offset", - "type": "float", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "A (positive) parameter that downweights early iterations in online learning. It should be greater than 1.0. In the literature, this is called tau_0." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "128", - "limitation": null, - "ignored": false, - "docstring": "Number of documents to use in each EM iteration. Only used in online learning." - }, - { - "name": "evaluate_every", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "How often to evaluate perplexity. Only used in `fit` method. set it to 0 or negative number to not evaluate perplexity in training at all. Evaluating perplexity can help you check convergence in training process, but it will also increase total training time. Evaluating perplexity in every iteration might increase training time up to two-fold." - }, - { - "name": "total_samples", - "type": "int", - "hasDefault": true, - "default": "1e6", - "limitation": null, - "ignored": false, - "docstring": "Total number of documents. Only used in the :meth:`partial_fit` method." - }, - { - "name": "perp_tol", - "type": "float", - "hasDefault": true, - "default": "1e-1", - "limitation": null, - "ignored": false, - "docstring": "Perplexity tolerance in batch learning. Only used when ``evaluate_every`` is greater than 0." - }, - { - "name": "mean_change_tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Stopping tolerance for updating document topic distribution in E-step." - }, - { - "name": "max_doc_update_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Max number of iterations for updating document topic distribution in the E-step." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use in the E-step. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check model parameters." - }, - { - "name": "_init_latent_vars", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize latent variables." - }, - { - "name": "_e_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "cal_sstats", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate whether to calculate sufficient statistics or not. Set ``cal_sstats`` to True when we need to run M-step." - }, - { - "name": "random_init", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate whether to initialize document topic distribution randomly in the E-step. Set it to True in training steps." - }, - { - "name": "parallel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-initialized instance of joblib.Parallel." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "E-step in EM update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ncal_sstats : bool\n Parameter that indicate whether to calculate sufficient statistics\n or not. Set ``cal_sstats`` to True when we need to run M-step.\n\nrandom_init : bool\n Parameter that indicate whether to initialize document topic\n distribution randomly in the E-step. Set it to True in training\n steps.\n\nparallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each\n document. In the literature, this is called `gamma`.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, it will be None." - }, - { - "name": "_em_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "total_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Total number of documents. It is only used when batch_update is `False`." - }, - { - "name": "batch_update", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that controls updating method. `True` for batch learning, `False` for online learning." - }, - { - "name": "parallel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-initialized instance of joblib.Parallel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "EM update for 1 iteration.\n\nupdate `_component` by batch VB or online VB.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ntotal_samples : int\n Total number of documents. It is only used when\n batch_update is `False`.\n\nbatch_update : bool\n Parameter that controls updating method.\n `True` for batch learning, `False` for online learning.\n\nparallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Unnormalized document topic distribution." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_non_neg_array", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check X format\n\ncheck X format and make sure no negative value in X.\n\nParameters\n----------\nX : array-like or sparse matrix" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online VB with Mini-Batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn model for the data X with variational Bayes method.\n\nWhen `learning_method` is 'online', use mini-batch update.\nOtherwise, use batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "_unnormalized_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data X according to fitted model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data X according to the fitted model.\n\n .. versionchanged:: 0.18\n *doc_topic_distr* is now normalized\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X." - }, - { - "name": "_approx_bound", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "doc_topic_distr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document topic distribution. In the literature, this is called gamma." - }, - { - "name": "sub_sampling", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Compensate for subsampling of documents. It is used in calculate bound in online learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the variational bound.\n\nEstimate the variational bound over \"all documents\" using only the\ndocuments passed in as X. Since log-likelihood of each word cannot\nbe computed directly, we use this bound to estimate it.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution. In the literature, this is called\n gamma.\n\nsub_sampling : bool, default=False\n Compensate for subsampling of documents.\n It is used in calculate bound in online learning.\n\nReturns\n-------\nscore : float" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate approximate log-likelihood as score.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nscore : float\n Use approximate bound as score." - }, - { - "name": "_perplexity_precomp_distr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "doc_topic_distr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document topic distribution. If it is None, it will be generated by applying transform on X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate approximate perplexity for data X with ability to accept\nprecomputed doc_topic_distr\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ndoc_topic_distr : ndarray of shape (n_samples, n_components), default=None\n Document topic distribution.\n If it is None, it will be generated by applying transform on X.\n\nReturns\n-------\nscore : float\n Perplexity score." - }, - { - "name": "perplexity", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "sub_sampling", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Do sub-sampling or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n *doc_topic_distr* argument has been deprecated and is ignored\n because user no longer has access to unnormalized distribution\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nsub_sampling : bool\n Do sub-sampling or not.\n\nReturns\n-------\nscore : float\n Perplexity score." - } - ], - "docstring": "Latent Dirichlet Allocation with online variational Bayes algorithm\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=10\n Number of topics.\n\n .. versionchanged:: 0.19\n ``n_topics`` was renamed to ``n_components``\n\ndoc_topic_prior : float, default=None\n Prior of document topic distribution `theta`. If the value is None,\n defaults to `1 / n_components`.\n In [1]_, this is called `alpha`.\n\ntopic_word_prior : float, default=None\n Prior of topic word distribution `beta`. If the value is None, defaults\n to `1 / n_components`.\n In [1]_, this is called `eta`.\n\nlearning_method : {'batch', 'online'}, default='batch'\n Method used to update `_component`. Only used in :meth:`fit` method.\n In general, if the data size is large, the online update will be much\n faster than the batch update.\n\n Valid options::\n\n 'batch': Batch variational Bayes method. Use all training data in\n each EM update.\n Old `components_` will be overwritten in each iteration.\n 'online': Online variational Bayes method. In each EM update, use\n mini-batch of training data to update the ``components_``\n variable incrementally. The learning rate is controlled by the\n ``learning_decay`` and the ``learning_offset`` parameters.\n\n .. versionchanged:: 0.20\n The default learning method is now ``\"batch\"``.\n\nlearning_decay : float, default=0.7\n It is a parameter that control learning rate in the online learning\n method. The value should be set between (0.5, 1.0] to guarantee\n asymptotic convergence. When the value is 0.0 and batch_size is\n ``n_samples``, the update method is same as batch learning. In the\n literature, this is called kappa.\n\nlearning_offset : float, default=10.\n A (positive) parameter that downweights early iterations in online\n learning. It should be greater than 1.0. In the literature, this is\n called tau_0.\n\nmax_iter : int, default=10\n The maximum number of iterations.\n\nbatch_size : int, default=128\n Number of documents to use in each EM iteration. Only used in online\n learning.\n\nevaluate_every : int, default=-1\n How often to evaluate perplexity. Only used in `fit` method.\n set it to 0 or negative number to not evaluate perplexity in\n training at all. Evaluating perplexity can help you check convergence\n in training process, but it will also increase total training time.\n Evaluating perplexity in every iteration might increase training time\n up to two-fold.\n\ntotal_samples : int, default=1e6\n Total number of documents. Only used in the :meth:`partial_fit` method.\n\nperp_tol : float, default=1e-1\n Perplexity tolerance in batch learning. Only used when\n ``evaluate_every`` is greater than 0.\n\nmean_change_tol : float, default=1e-3\n Stopping tolerance for updating document topic distribution in E-step.\n\nmax_doc_update_iter : int, default=100\n Max number of iterations for updating document topic distribution in\n the E-step.\n\nn_jobs : int, default=None\n The number of jobs to use in the E-step.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Variational parameters for topic word distribution. Since the complete\n conditional for topic word distribution is a Dirichlet,\n ``components_[i, j]`` can be viewed as pseudocount that represents the\n number of times word `j` was assigned to topic `i`.\n It can also be viewed as distribution over the words for each topic\n after normalization:\n ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\nexp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\nn_batch_iter_ : int\n Number of iterations of the EM step.\n\nn_iter_ : int\n Number of passes over the dataset.\n\nbound_ : float\n Final perplexity score on training set.\n\ndoc_topic_prior_ : float\n Prior of document topic distribution `theta`. If the value is None,\n it is `1 / n_components`.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\ntopic_word_prior_ : float\n Prior of topic word distribution `beta`. If the value is None, it is\n `1 / n_components`.\n\nExamples\n--------\n>>> from sklearn.decomposition import LatentDirichletAllocation\n>>> from sklearn.datasets import make_multilabel_classification\n>>> # This produces a feature matrix of token counts, similar to what\n>>> # CountVectorizer would produce on text.\n>>> X, _ = make_multilabel_classification(random_state=0)\n>>> lda = LatentDirichletAllocation(n_components=5,\n... random_state=0)\n>>> lda.fit(X)\nLatentDirichletAllocation(...)\n>>> # get topics for some given samples:\n>>> lda.transform(X[-2:])\narray([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586 ]])\n\nReferences\n----------\n.. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n Hoffman, David M. Blei, Francis Bach, 2010\n\n[2] \"Stochastic Variational Inference\", Matthew D. Hoffman, David M. Blei,\n Chong Wang, John Paisley, 2013\n\n[3] Matthew D. Hoffman's onlineldavb code. Link:\n https://github.com/blei-lab/onlineldavb" - } - ], - "functions": [ - { - "name": "_update_doc_distribution", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "exp_topic_word_distr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Exponential value of expectation of log topic word distribution. In the literature, this is `exp(E[log(beta)])`." - }, - { - "name": "doc_topic_prior", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior of document topic distribution `theta`." - }, - { - "name": "max_iters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Max number of iterations for updating document topic distribution in the E-step." - }, - { - "name": "mean_change_tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping tolerance for updating document topic distribution in E-setp." - }, - { - "name": "cal_sstats", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate to calculate sufficient statistics or not. Set `cal_sstats` to `True` when we need to run M-step." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate how to initialize document topic distribution. Set `random_state` to None will initialize document topic distribution to a constant number." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "E-step: update document-topic distribution.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nexp_topic_word_distr : ndarray of shape (n_topics, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\ndoc_topic_prior : float\n Prior of document topic distribution `theta`.\n\nmax_iters : int\n Max number of iterations for updating document topic distribution in\n the E-step.\n\nmean_change_tol : float\n Stopping tolerance for updating document topic distribution in E-setp.\n\ncal_sstats : bool\n Parameter that indicate to calculate sufficient statistics or not.\n Set `cal_sstats` to `True` when we need to run M-step.\n\nrandom_state : RandomState instance or None\n Parameter that indicate how to initialize document topic distribution.\n Set `random_state` to None will initialize document topic distribution\n to a constant number.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each document.\n In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n from it.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, this will be None." - } - ] - }, - { - "name": "sklearn.decomposition._nmf", - "imports": [ - "import numbers", - "import numpy as np", - "import scipy.sparse as sp", - "import time", - "import warnings", - "from math import sqrt", - "from _cdnmf_fast import _update_cdnmf_fast", - "from _config import config_context", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from exceptions import ConvergenceWarning", - "from utils import check_random_state", - "from utils import check_array", - "from utils.extmath import randomized_svd", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import squared_norm", - "from utils.validation import check_is_fitted", - "from utils.validation import check_non_negative", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "NMF", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components, if n_components is not set all features are kept." - }, - { - "name": "init", - "type": "Literal['random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method used to initialize the procedure. Default: None. Valid options: - `None`: 'nndsvd' if n_components <= min(n_samples, n_features), otherwise random. - `'random'`: non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - `'nndsvda'`: NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - `'nndsvdar'` NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - `'custom'`: use custom matrices W and H" - }, - { - "name": "solver", - "type": "Literal['cd', 'mu']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "Numerical solver to use: 'cd' is a Coordinate Descent solver. 'mu' is a Multiplicative Update solver. .. versionadded:: 0.17 Coordinate Descent solver. .. versionadded:: 0.19 Multiplicative Update solver." - }, - { - "name": "beta_loss", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": true, - "default": "'frobenius'", - "limitation": null, - "ignored": false, - "docstring": "Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros. Used only in 'mu' solver. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations before timing out." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization terms. Set it to zero to have no regularization. .. versionadded:: 0.17 *alpha* used in the Coordinate Descent solver." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The regularization mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm). For l1_ratio = 1 it is an elementwise L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. .. versionadded:: 0.17 Regularization parameter *l1_ratio* used in the Coordinate Descent solver." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Whether to be verbose." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, randomize the order of coordinates in the CD solver. .. versionadded:: 0.17 *shuffle* parameter used in the Coordinate Descent solver." - }, - { - "name": "regularization", - "type": "Optional[Literal['both', 'components', 'transformation']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Select whether the regularization affects the components (H), the transformation (W), both or none of them. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix to be decomposed" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\ny : Ignored\n\nW : array-like of shape (n_samples, n_components)\n If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n If init='custom', it is used as initial guess for the solution.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix to be decomposed" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn a NMF model for the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix to be transformed by the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the data X according to the fitted NMF model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be transformed by the model.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "W", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Transformed data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data back to its original space.\n\nParameters\n----------\nW : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Transformed data matrix.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Data matrix of original shape.\n\n.. versionadded:: 0.18" - } - ], - "docstring": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{Fro}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nFor multiplicative-update ('mu') solver, the Frobenius norm\n(:math:`0.5 * ||X - WH||_{Fro}^2`) can be changed into another\nbeta-divergence loss, by changing the beta_loss parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - `None`: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise random.\n\n - `'random'`: non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - `'nndsvdar'` NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - `'custom'`: use custom matrices W and H\n\nsolver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n 'cd' is a Coordinate Descent solver.\n 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nalpha : float, default=0.\n Constant that multiplies the regularization terms. Set it to zero to\n have no regularization.\n\n .. versionadded:: 0.17\n *alpha* used in the Coordinate Descent solver.\n\nl1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n .. versionadded:: 0.17\n Regularization parameter *l1_ratio* used in the Coordinate Descent\n solver.\n\nverbose : int, default=0\n Whether to be verbose.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\n .. versionadded:: 0.17\n *shuffle* parameter used in the Coordinate Descent solver.\n\nregularization : {'both', 'components', 'transformation', None}, default='both'\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n The number of components. It is same as the `n_components` parameter\n if it was given. Otherwise, it will be same as the number of\n features.\n\nreconstruction_err_ : float\n Frobenius norm of the matrix difference, or beta-divergence, between\n the training data ``X`` and the reconstructed data ``WH`` from\n the fitted model.\n\nn_iter_ : int\n Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import NMF\n>>> model = NMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_\n\nReferences\n----------\nCichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.\n\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9)." - } - ], - "functions": [ - { - "name": "norm", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vector for which to compute the norm." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dot product-based Euclidean norm implementation.\n\nSee: http://fseoane.net/blog/2011/computing-the-vector-norm/\n\nParameters\n----------\nx : array-like\n Vector for which to compute the norm." - }, - { - "name": "trace_dot", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First matrix." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Second matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Trace of np.dot(X, Y.T).\n\nParameters\n----------\nX : array-like\n First matrix.\nY : array-like\n Second matrix." - }, - { - "name": "_check_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_beta_divergence", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "W", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "H", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "beta", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter of the beta-divergence. If beta == 2, this is half the Frobenius *squared* norm. If beta == 1, this is the generalized Kullback-Leibler divergence. If beta == 0, this is the Itakura-Saito divergence. Else, this is the general beta-divergence." - }, - { - "name": "square_root", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return np.sqrt(2 * res) For beta == 2, it corresponds to the Frobenius norm." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the beta-divergence of X and dot(W, H).\n\nParameters\n----------\nX : float or array-like of shape (n_samples, n_features)\n\nW : float or array-like of shape (n_samples, n_components)\n\nH : float or array-like of shape (n_components, n_features)\n\nbeta : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}\n Parameter of the beta-divergence.\n If beta == 2, this is half the Frobenius *squared* norm.\n If beta == 1, this is the generalized Kullback-Leibler divergence.\n If beta == 0, this is the Itakura-Saito divergence.\n Else, this is the general beta-divergence.\n\nsquare_root : bool, default=False\n If True, return np.sqrt(2 * res)\n For beta == 2, it corresponds to the Frobenius norm.\n\nReturns\n-------\n res : float\n Beta divergence of X and np.dot(X, H)." - }, - { - "name": "_special_sparse_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes np.dot(W, H), only where X is non zero." - }, - { - "name": "_compute_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute L1 and L2 regularization coefficients for W and H." - }, - { - "name": "_check_string_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_beta_loss_to_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert string beta_loss to float." - }, - { - "name": "_initialize_nmf", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix to be decomposed." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of components desired in the approximation." - }, - { - "name": "init", - "type": "Literal['random', 'nndsvd', 'nndsvda', 'nndsvdar']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method used to initialize the procedure. Default: None. Valid options: - None: 'nndsvd' if n_components <= min(n_samples, n_features), otherwise 'random'. - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - 'custom': use custom matrices W and H" - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Truncate all values less then this in output to zero." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Algorithms for NMF initialization.\n\nComputes an initial guess for the non-negative\nrank k matrix approximation for X: X = WH.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix to be decomposed.\n\nn_components : int\n The number of components desired in the approximation.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - None: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H\n\neps : float, default=1e-6\n Truncate all values less then this in output to zero.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nW : array-like of shape (n_samples, n_components)\n Initial guesses for solving X ~= WH.\n\nH : array-like of shape (n_components, n_features)\n Initial guesses for solving X ~= WH.\n\nReferences\n----------\nC. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for\nnonnegative matrix factorization - Pattern Recognition, 2008\nhttp://tinyurl.com/nndsvd" - }, - { - "name": "_update_coordinate_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function for _fit_coordinate_descent.\n\nUpdate W to minimize the objective function, iterating once over all\ncoordinates. By symmetry, to update H, one can call\n_update_coordinate_descent(X.T, Ht, W, ...)." - }, - { - "name": "_fit_coordinate_descent", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant matrix." - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations before timing out." - }, - { - "name": "l1_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for W." - }, - { - "name": "l1_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for H." - }, - { - "name": "l2_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for W." - }, - { - "name": "l2_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for H." - }, - { - "name": "update_H", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, randomize the order of coordinates in the CD solver." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to randomize the coordinates in the CD solver, when ``shuffle`` is set to ``True``. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent\n\nThe objective function is minimized with an alternating minimization of W\nand H. Each minimization is done with a cyclic (up to a permutation of the\nfeatures) Coordinate Descent.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant matrix.\n\nW : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nl1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\nl1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\nl2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\nl2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nverbose : int, default=0\n The verbosity level.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to randomize the coordinates in the CD solver, when\n ``shuffle`` is set to ``True``. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n The number of iterations done by the algorithm.\n\nReferences\n----------\nCichocki, Andrzej, and Phan, Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009." - }, - { - "name": "_multiplicative_update_w", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update W in Multiplicative Update NMF." - }, - { - "name": "_multiplicative_update_h", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update H in Multiplicative Update NMF." - }, - { - "name": "_fit_multiplicative_update", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant input matrix." - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "beta_loss", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": true, - "default": "'frobenius'", - "limitation": null, - "ignored": false, - "docstring": "String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}. Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "l1_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for W." - }, - { - "name": "l1_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for H." - }, - { - "name": "l2_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for W." - }, - { - "name": "l2_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for H." - }, - { - "name": "update_H", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Non-negative Matrix Factorization with Multiplicative Update.\n\nThe objective function is _beta_divergence(X, WH) and is minimized with an\nalternating minimization of W and H. Each minimization is done with a\nMultiplicative Update.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant input matrix.\n\nW : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros.\n\nmax_iter : int, default=200\n Number of iterations.\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nl1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\nl1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\nl2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\nl2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nverbose : int, default=0\n The verbosity level.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n The number of iterations done by the algorithm.\n\nReferences\n----------\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9)." - }, - { - "name": "non_negative_factorization", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant matrix." - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution. If update_H=False, it is used as a constant, to solve for W only." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components, if n_components is not set all features are kept." - }, - { - "name": "init", - "type": "Literal['random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method used to initialize the procedure. Valid options: - None: 'nndsvd' if n_components < n_features, otherwise 'random'. - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - 'custom': use custom matrices W and H if `update_H=True`. If `update_H=False`, then only custom matrix H is used. .. versionchanged:: 0.23 The default value of `init` changed from 'random' to None in 0.23." - }, - { - "name": "update_H", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated." - }, - { - "name": "solver", - "type": "Literal['cd', 'mu']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "Numerical solver to use: - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical Alternating Least Squares (Fast HALS). - 'mu' is a Multiplicative Update solver. .. versionadded:: 0.17 Coordinate Descent solver. .. versionadded:: 0.19 Multiplicative Update solver." - }, - { - "name": "beta_loss", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": true, - "default": "'frobenius'", - "limitation": null, - "ignored": false, - "docstring": "Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros. Used only in 'mu' solver. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations before timing out." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization terms." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The regularization mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm). For l1_ratio = 1 it is an elementwise L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2." - }, - { - "name": "regularization", - "type": "Literal['both', 'components', 'transformation']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Select whether the regularization affects the components (H), the transformation (W), both or none of them." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for NMF initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, randomize the order of coordinates in the CD solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{Fro}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nFor multiplicative-update ('mu') solver, the Frobenius norm\n:math:`(0.5 * ||X - WH||_{Fro}^2)` can be changed into another\nbeta-divergence loss, by changing the beta_loss parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant matrix.\n\nW : array-like of shape (n_samples, n_components), default=None\n If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n If init='custom', it is used as initial guess for the solution.\n If update_H=False, it is used as a constant, to solve for W only.\n\nn_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n\n Valid options:\n\n - None: 'nndsvd' if n_components < n_features, otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H if `update_H=True`. If\n `update_H=False`, then only custom matrix H is used.\n\n .. versionchanged:: 0.23\n The default value of `init` changed from 'random' to None in 0.23.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nsolver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n\n - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n Alternating Least Squares (Fast HALS).\n\n - 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nalpha : float, default=0.\n Constant that multiplies the regularization terms.\n\nl1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nregularization : {'both', 'components', 'transformation'}, default=None\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n The verbosity level.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import non_negative_factorization\n>>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n... init='random', random_state=0)\n\nReferences\n----------\nCichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.\n\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9)." - } - ] - }, - { - "name": "sklearn.decomposition._pca", - "imports": [ - "from math import log", - "from math import sqrt", - "import numbers", - "import numpy as np", - "from scipy import linalg", - "from scipy.special import gammaln", - "from scipy.sparse import issparse", - "from scipy.sparse.linalg import svds", - "from _base import _BasePCA", - "from utils import check_random_state", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import fast_logdet", - "from utils.extmath import randomized_svd", - "from utils.extmath import svd_flip", - "from utils.extmath import stable_cumsum", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "PCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "Union[float, int, Literal['mle']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. if n_components is not set all components are kept:: n_components == min(n_samples, n_features) If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's MLE is used to guess the dimension. Use of ``n_components == 'mle'`` will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``. If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components. If ``svd_solver == 'arpack'``, the number of components must be strictly less than the minimum of n_features and n_samples. Hence, the None case results in:: n_components == min(n_samples, n_features) - 1" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, data passed to fit are overwritten and running fit(X).transform(X) will not yield the expected results, use fit_transform(X) instead." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When True (False by default) the `components_` vectors are multiplied by the square root of n_samples and then divided by the singular values to ensure uncorrelated outputs with unit component-wise variances. Whitening will remove some information from the transformed signal (the relative variance scales of the components) but can sometime improve the predictive accuracy of the downstream estimators by making their data respect some hard-wired assumptions." - }, - { - "name": "svd_solver", - "type": "Literal['auto', 'full', 'arpack', 'randomized']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If auto : The solver is selected by a default policy based on `X.shape` and `n_components`: if the input data is larger than 500x500 and the number of components to extract is lower than 80% of the smallest dimension of the data, then the more efficient 'randomized' method is enabled. Otherwise the exact full SVD is computed and optionally truncated afterwards. If full : run exact full SVD calling the standard LAPACK solver via `scipy.linalg.svd` and select the components by postprocessing If arpack : run SVD truncated to n_components calling ARPACK solver via `scipy.sparse.linalg.svds`. It requires strictly 0 < n_components < min(X.shape) If randomized : run randomized SVD by the method of Halko et al. .. versionadded:: 0.18.0" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). .. versionadded:: 0.18.0" - }, - { - "name": "iterated_power", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations for the power method computed by svd_solver == 'randomized'. Must be of range [0, infinity). .. versionadded:: 0.18.0" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when the 'arpack' or 'randomized' solvers are used. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. .. versionadded:: 0.18.0" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model with X and apply the dimensionality reduction on X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed values.\n\nNotes\n-----\nThis method returns a Fortran-ordered array. To convert it to a\nC-ordered array, use 'np.ascontiguousarray'." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dispatch to the right submethod depending on the chosen solver." - }, - { - "name": "_fit_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model by computing full SVD on X." - }, - { - "name": "_fit_truncated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model by computing truncated SVD (by ARPACK or randomized)\non X." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the log-likelihood of each sample.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the average log-likelihood of all samples.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\ny : Ignored\n\nReturns\n-------\nll : float\n Average log-likelihood of the samples under the current model." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, float or 'mle', default=None\n Number of components to keep.\n if n_components is not set all components are kept::\n\n n_components == min(n_samples, n_features)\n\n If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n number of components such that the amount of variance that needs to be\n explained is greater than the percentage specified by n_components.\n\n If ``svd_solver == 'arpack'``, the number of components must be\n strictly less than the minimum of n_features and n_samples.\n\n Hence, the None case results in::\n\n n_components == min(n_samples, n_features) - 1\n\ncopy : bool, default=True\n If False, data passed to fit are overwritten and running\n fit(X).transform(X) will not yield the expected results,\n use fit_transform(X) instead.\n\nwhiten : bool, default=False\n When True (False by default) the `components_` vectors are multiplied\n by the square root of n_samples and then divided by the singular values\n to ensure uncorrelated outputs with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometime\n improve the predictive accuracy of the downstream estimators by\n making their data respect some hard-wired assumptions.\n\nsvd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n If auto :\n The solver is selected by a default policy based on `X.shape` and\n `n_components`: if the input data is larger than 500x500 and the\n number of components to extract is lower than 80% of the smallest\n dimension of the data, then the more efficient 'randomized'\n method is enabled. Otherwise the exact full SVD is computed and\n optionally truncated afterwards.\n If full :\n run exact full SVD calling the standard LAPACK solver via\n `scipy.linalg.svd` and select the components by postprocessing\n If arpack :\n run SVD truncated to n_components calling ARPACK solver via\n `scipy.sparse.linalg.svds`. It requires strictly\n 0 < n_components < min(X.shape)\n If randomized :\n run randomized SVD by the method of Halko et al.\n\n .. versionadded:: 0.18.0\n\ntol : float, default=0.0\n Tolerance for singular values computed by svd_solver == 'arpack'.\n Must be of range [0.0, infinity).\n\n .. versionadded:: 0.18.0\n\niterated_power : int or 'auto', default='auto'\n Number of iterations for the power method computed by\n svd_solver == 'randomized'.\n Must be of range [0, infinity).\n\n .. versionadded:: 0.18.0\n\nrandom_state : int, RandomState instance or None, default=None\n Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18.0\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Principal axes in feature space, representing the directions of\n maximum variance in the data. The components are sorted by\n ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n The amount of variance explained by each of the selected components.\n\n Equal to n_components largest eigenvalues\n of the covariance matrix of X.\n\n .. versionadded:: 0.18\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\n If ``n_components`` is not set then all components are stored and the\n sum of the ratios is equal to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\n .. versionadded:: 0.19\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\n Equal to `X.mean(axis=0)`.\n\nn_components_ : int\n The estimated number of components. When n_components is set\n to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n number is estimated from input data. Otherwise it equals the parameter\n n_components, or the lesser value of n_features and n_samples\n if n_components is None.\n\nn_features_ : int\n Number of features in the training data.\n\nn_samples_ : int\n Number of samples in the training data.\n\nnoise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n compute the estimated data covariance and score samples.\n\n Equal to the average of (min(n_features, n_samples) - n_components)\n smallest eigenvalues of the covariance matrix of X.\n\nSee Also\n--------\nKernelPCA : Kernel Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\nIncrementalPCA : Incremental Principal Component Analysis.\n\nReferences\n----------\nFor n_components == 'mle', this class uses the method of *Minka, T. P.\n\"Automatic choice of dimensionality for PCA\". In NIPS, pp. 598-604*\n\nImplements the probabilistic PCA model from:\nTipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\ncomponent analysis\". Journal of the Royal Statistical Society:\nSeries B (Statistical Methodology), 61(3), 611-622.\nvia the score and score_samples methods.\nSee http://www.miketipping.com/papers/met-mppca.pdf\n\nFor svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\nFor svd_solver == 'randomized', see:\n*Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n\"Finding structure with randomness: Probabilistic algorithms for\nconstructing approximate matrix decompositions\".\nSIAM review, 53(2), 217-288.* and also\n*Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n\"A randomized algorithm for the decomposition of matrices\".\nApplied and Computational Harmonic Analysis, 30(1), 47-68.*\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import PCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> pca = PCA(n_components=2)\n>>> pca.fit(X)\nPCA(n_components=2)\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.0075...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=2, svd_solver='full')\n>>> pca.fit(X)\nPCA(n_components=2, svd_solver='full')\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.00755...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=1, svd_solver='arpack')\n>>> pca.fit(X)\nPCA(n_components=1, svd_solver='arpack')\n>>> print(pca.explained_variance_ratio_)\n[0.99244...]\n>>> print(pca.singular_values_)\n[6.30061...]" - } - ], - "functions": [ - { - "name": "_assess_dimension", - "decorators": [], - "parameters": [ - { - "name": "spectrum", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data spectrum." - }, - { - "name": "rank", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tested rank value. It should be strictly lower than n_features, otherwise the method isn't specified (division by zero in equation (31) from the paper)." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log-likelihood of a rank ``rank`` dataset.\n\nThe dataset is assumed to be embedded in gaussian noise of shape(n,\ndimf) having spectrum ``spectrum``.\n\nParameters\n----------\nspectrum : ndarray of shape (n_features,)\n Data spectrum.\nrank : int\n Tested rank value. It should be strictly lower than n_features,\n otherwise the method isn't specified (division by zero in equation\n (31) from the paper).\nn_samples : int\n Number of samples.\n\nReturns\n-------\nll : float\n The log-likelihood.\n\nNotes\n-----\nThis implements the method of `Thomas P. Minka:\nAutomatic Choice of Dimensionality for PCA. NIPS 2000: 598-604`" - }, - { - "name": "_infer_dimension", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Infers the dimension of a dataset with a given spectrum.\n\nThe returned value will be in [1, n_features - 1]." - } - ] - }, - { - "name": "sklearn.decomposition._sparse_pca", - "imports": [ - "import numpy as np", - "from utils import check_random_state", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from linear_model import ridge_regression", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from _dict_learning import dict_learning", - "from _dict_learning import dict_learning_online" - ], - "classes": [ - { - "name": "SparsePCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of sparse atoms to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter. Higher values lead to sparser components." - }, - { - "name": "ridge_alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of ridge shrinkage to apply in order to improve conditioning when calling the transform method." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the stopping condition." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "lars: uses the least angle regression method to solve the lasso problem (linear_model.lars_path) cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). Lars will be faster if the estimated components are sparse." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "U_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial values for the loadings for warm restart scenarios." - }, - { - "name": "V_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial values for the components for warm restart scenarios." - }, - { - "name": "verbose", - "type": "Union[bool, int]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages. Defaults to 0." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used during dictionary learning. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data to be transformed, must have the same number of features as the data used to train the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least Squares projection of the data onto the sparse components.\n\nTo avoid instability issues in case the system is under-determined,\nregularization can be applied (Ridge regression) via the\n`ridge_alpha` parameter.\n\nNote that Sparse PCA components orthogonality is not enforced as in PCA\nhence one cannot use a simple linear projection.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - } - ], - "docstring": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of sparse atoms to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\nridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nU_init : ndarray of shape (n_samples, n_components), default=None\n Initial values for the loadings for warm restart scenarios.\n\nV_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the components for warm restart scenarios.\n\nverbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nrandom_state : int, RandomState instance or None, default=None\n Used during dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\nerror_ : ndarray\n Vector of errors at each iteration.\n\nn_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import SparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = SparsePCA(n_components=5, random_state=0)\n>>> transformer.fit(X)\nSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.9666...\n\nSee Also\n--------\nPCA\nMiniBatchSparsePCA\nDictionaryLearning" - }, - { - "name": "MiniBatchSparsePCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of sparse atoms to extract" - }, - { - "name": "alpha", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter. Higher values lead to sparser components." - }, - { - "name": "ridge_alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of ridge shrinkage to apply in order to improve conditioning when calling the transform method." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "number of iterations to perform for each mini batch" - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "callable that gets invoked every five iterations" - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "the number of features to take in each mini batch" - }, - { - "name": "verbose", - "type": "Union[bool, int]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages. Defaults to 0." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to shuffle the data before splitting it in batches" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "lars: uses the least angle regression method to solve the lasso problem (linear_model.lars_path) cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). Lars will be faster if the estimated components are sparse." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for random shuffling when ``shuffle`` is set to ``True``, during online dictionary learning. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - } - ], - "docstring": "Mini-batch Sparse Principal Components Analysis\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n number of sparse atoms to extract\n\nalpha : int, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\nridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\nn_iter : int, default=100\n number of iterations to perform for each mini batch\n\ncallback : callable, default=None\n callable that gets invoked every five iterations\n\nbatch_size : int, default=3\n the number of features to take in each mini batch\n\nverbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nshuffle : bool, default=True\n whether to shuffle the data before splitting it in batches\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for random shuffling when ``shuffle`` is set to ``True``,\n during online dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\nn_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import MiniBatchSparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n... random_state=0)\n>>> transformer.fit(X)\nMiniBatchSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.94\n\nSee Also\n--------\nPCA\nSparsePCA\nDictionaryLearning" - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._truncated_svd", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.sparse.linalg import svds", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import check_random_state", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import randomized_svd", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import svd_flip", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import check_is_fitted" - ], - "classes": [ - { - "name": "TruncatedSVD", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Desired dimensionality of output data. Must be strictly less than the number of features. The default value is useful for visualisation. For LSA, a value of 100 is recommended." - }, - { - "name": "algorithm", - "type": "Literal['arpack', 'randomized']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy (scipy.sparse.linalg.svds), or \"randomized\" for the randomized algorithm due to Halko (2009)." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations for randomized SVD solver. Not used by ARPACK. The default is larger than the default in :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse matrices that may have large slowly decaying spectrum." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used during randomized svd. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for ARPACK. 0 means machine precision. Ignored by randomized SVD solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit model on training data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the transformer object." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model to X and perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X back to its original space.\n\nReturns an array X_original whose transform would be X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n Note that this is always a dense array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Desired dimensionality of output data.\n Must be strictly less than the number of features.\n The default value is useful for visualisation. For LSA, a value of\n 100 is recommended.\n\nalgorithm : {'arpack', 'randomized'}, default='randomized'\n SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n algorithm due to Halko (2009).\n\nn_iter : int, default=5\n Number of iterations for randomized SVD solver. Not used by ARPACK. The\n default is larger than the default in\n :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n matrices that may have large slowly decaying spectrum.\n\nrandom_state : int, RandomState instance or None, default=None\n Used during randomized svd. Pass an int for reproducible results across\n multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=0.\n Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n SVD solver.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n\nexplained_variance_ : ndarray of shape (n_components,)\n The variance of the training samples transformed by a projection to\n each component.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\nsingular_values_ : ndarray od shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\nExamples\n--------\n>>> from sklearn.decomposition import TruncatedSVD\n>>> from scipy.sparse import random as sparse_random\n>>> X = sparse_random(100, 100, density=0.01, format='csr',\n... random_state=42)\n>>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> svd.fit(X)\nTruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> print(svd.explained_variance_ratio_)\n[0.0646... 0.0633... 0.0639... 0.0535... 0.0406...]\n>>> print(svd.explained_variance_ratio_.sum())\n0.286...\n>>> print(svd.singular_values_)\n[1.553... 1.512... 1.510... 1.370... 1.199...]\n\nSee Also\n--------\nPCA\n\nReferences\n----------\nFinding structure with randomness: Stochastic algorithms for constructing\napproximate matrix decompositions\nHalko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\nNotes\n-----\nSVD suffers from a problem called \"sign indeterminacy\", which means the\nsign of the ``components_`` and the output from transform depend on the\nalgorithm and random state. To work around this, fit instances of this\nclass to data once, then keep the instance around to do transformations." - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition", - "imports": [ - "from _nmf import NMF", - "from _nmf import non_negative_factorization", - "from _pca import PCA", - "from _incremental_pca import IncrementalPCA", - "from _kernel_pca import KernelPCA", - "from _sparse_pca import SparsePCA", - "from _sparse_pca import MiniBatchSparsePCA", - "from _truncated_svd import TruncatedSVD", - "from _fastica import FastICA", - "from _fastica import fastica", - "from _dict_learning import dict_learning", - "from _dict_learning import dict_learning_online", - "from _dict_learning import sparse_encode", - "from _dict_learning import DictionaryLearning", - "from _dict_learning import MiniBatchDictionaryLearning", - "from _dict_learning import SparseCoder", - "from _factor_analysis import FactorAnalysis", - "from utils.extmath import randomized_svd", - "from _lda import LatentDirichletAllocation" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.decomposition.tests.test_dict_learning", - "imports": [ - "import pytest", - "import numpy as np", - "from functools import partial", - "import itertools", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils import check_array", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.decomposition import DictionaryLearning", - "from sklearn.decomposition import MiniBatchDictionaryLearning", - "from sklearn.decomposition import SparseCoder", - "from sklearn.decomposition import dict_learning", - "from sklearn.decomposition import dict_learning_online", - "from sklearn.decomposition import sparse_encode", - "from sklearn.utils.estimator_checks import check_transformer_data_not_an_array", - "from sklearn.utils.estimator_checks import check_transformer_general", - "from sklearn.utils.estimator_checks import check_transformers_unfitted", - "from io import StringIO", - "import sys" - ], - "classes": [], - "functions": [ - { - "name": "test_sparse_encode_shapes_omp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_overcomplete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lars_positive_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lars_dict_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lars_code_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_reconstruction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_reconstruction_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lassocd_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_nonzero_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_unknown_fit_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_lars_positive_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_dictionary_learning_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_dictionary_learning_lars", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_estimator_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_overcomplete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_initialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_readonly_initialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_iter_offset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_unavailable_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_error_default_sparsity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unknown_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_estimator_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_parallel_mmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_common_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_factor_analysis", - "imports": [ - "from itertools import combinations", - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.decomposition import FactorAnalysis", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.decomposition._factor_analysis import _ortho_rotation" - ], - "classes": [], - "functions": [ - { - "name": "test_factor_analysis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_fastica", - "imports": [ - "import itertools", - "import warnings", - "import pytest", - "import numpy as np", - "from scipy import stats", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.decomposition import FastICA", - "from sklearn.decomposition import fastica", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition._fastica import _gs_decorrelation", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "center_and_norm", - "decorators": [], - "parameters": [ - { - "name": "x: ndarray", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array with an axis of observations (statistical units) measured on random variables." - }, - { - "name": "axis: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the mean and variance are calculated." - }, - { - "name": "optional", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the mean and variance are calculated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Centers and norms x **in place**\n\nParameters\n-----------\nx: ndarray\n Array with an axis of observations (statistical units) measured on\n random variables.\naxis: int, optional\n Axis along which the mean and variance are calculated." - }, - { - "name": "test_gs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_simple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_nowhiten", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_square_fastica", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_output_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_incremental_pca", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn import datasets", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import IncrementalPCA", - "from scipy import sparse" - ], - "classes": [], - "functions": [ - { - "name": "test_incremental_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_check_projection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_num_features_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_batch_signs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_batch_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_batch_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_against_pca_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_against_pca_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explained_variances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_whitening", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_partial_fit_float_division", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_fit_overflow_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_kernel_pca", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import KernelPCA", - "from sklearn.datasets import make_circles", - "from sklearn.datasets import make_blobs", - "from sklearn.linear_model import Perceptron", - "from sklearn.pipeline import Pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.utils.validation import _check_psd_eigenvalues" - ], - "classes": [], - "functions": [ - { - "name": "test_kernel_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_invalid_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_consistent_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_deterministic_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_linear_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_remove_zero_eig", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_zero_eig", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This test checks that fit().transform() returns the same result as\nfit_transform() in case of non-removed zero eigenvalue.\nNon-regression test for issue #12141 (PR #12143)" - }, - { - "name": "test_kernel_pca_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_invalid_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_pipeline_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nested_circles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_conditioning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that ``_check_psd_eigenvalues`` is correctly called\nNon-regression test for issue #12140 (PR #12145)" - }, - { - "name": "test_kernel_pca_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_32_64_decomposition_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the decomposition is similar for 32 and 64 bits data " - }, - { - "name": "test_kernel_pcc_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_nmf", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from sklearn.decomposition import NMF", - "from sklearn.decomposition import non_negative_factorization", - "from sklearn.decomposition import _nmf as nmf", - "from scipy.sparse import csc_matrix", - "import pytest", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.extmath import squared_norm", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialize_nn_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameter_checking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialize_close", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialize_variants", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_fit_nn_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_fit_close", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_transform_custom_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_greater_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_sparse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_negative_factorization_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_negative_factorization_checking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_beta_divergence_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the beta-divergence of X and W.H for dense array only.\n\nUsed as a reference for testing nmf._beta_divergence." - }, - { - "name": "test_beta_divergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_special_sparse_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_multiplicative_update_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_negative_beta_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_decreasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_underflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_float32_float64_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_custom_init_dtype_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_default_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_online_lda", - "imports": [ - "import sys", - "import numpy as np", - "from scipy.linalg import block_diag", - "from scipy.sparse import csr_matrix", - "from scipy.special import psi", - "import pytest", - "from sklearn.decomposition import LatentDirichletAllocation", - "from sklearn.decomposition._lda import _dirichlet_expectation_1d", - "from sklearn.decomposition._lda import _dirichlet_expectation_2d", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import if_safe_multiprocessing_with_blas", - "from sklearn.exceptions import NotFittedError", - "from io import StringIO" - ], - "classes": [], - "functions": [ - { - "name": "_build_sparse_mtx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_default_prior_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_batch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_online", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_dense_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_negative_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_no_component_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_multi_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_partial_fit_multi_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_preplexity_mismatch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_perplexity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perplexity_input_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_score_perplexity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_perplexity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_empty_docs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test LDA on empty document (all-zero rows)." - }, - { - "name": "test_dirichlet_expectation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test Cython version of Dirichlet expectation calculation." - }, - { - "name": "check_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_pca", - "imports": [ - "import numpy as np", - "import scipy as sp", - "import pytest", - "from sklearn.utils._testing import assert_allclose", - "from sklearn import datasets", - "from sklearn.decomposition import PCA", - "from sklearn.datasets import load_iris", - "from sklearn.decomposition._pca import _assess_dimension", - "from sklearn.decomposition._pca import _infer_dimension" - ], - "classes": [], - "functions": [ - { - "name": "test_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_empty_slice_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_whitening", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_explained_variance_equivalence_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_explained_variance_empirical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_singular_values_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_singular_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_check_projection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_check_projection_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_mle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_mle_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_dim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_3", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_by_explained_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_score3", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_sanity_noise_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_score_consistency_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_zero_noise_variance_edge_cases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_svd_solver_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_bad_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_deterministic_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_dtype_preservation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pca_float_dtype_preservation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pca_int_dtype_upcast_to_double", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_n_components_mostly_explained_variance_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assess_dimension_bad_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_eigenvalues_mle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mle_redundant_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_mle_too_few_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mle_simple_case", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assess_dimesion_rank_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_sparse_pca", - "imports": [ - "import sys", - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import if_safe_multiprocessing_with_blas", - "from sklearn.decomposition import SparsePCA", - "from sklearn.decomposition import MiniBatchSparsePCA", - "from sklearn.decomposition import PCA", - "from sklearn.utils import check_random_state", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "generate_toy_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform_tall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mini_batch_correct_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mini_batch_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaling_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_vs_spca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spca_n_components_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_truncated_svd", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.decomposition import PCA", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "X_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_too_many_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_formats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_integers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explained_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explained_variance_components_10_20", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_values_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_values_expected", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_truncated_svd_eq_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble.setup", - "imports": [ - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._bagging", - "imports": [ - "import itertools", - "import numbers", - "import numpy as np", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from warnings import warn", - "from joblib import Parallel", - "from _base import BaseEnsemble", - "from _base import _partition_estimators", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from metrics import r2_score", - "from metrics import accuracy_score", - "from tree import DecisionTreeClassifier", - "from tree import DecisionTreeRegressor", - "from utils import check_random_state", - "from utils import check_array", - "from utils import column_or_1d", - "from utils import indices_to_mask", - "from utils.metaestimators import if_delegate_has_method", - "from utils.multiclass import check_classification_targets", - "from utils.random import sample_without_replacement", - "from utils.validation import has_fit_parameter", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "BaseBagging", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a Bagging ensemble of estimators from the training\n set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\nReturns\n-------\nself : object" - }, - { - "name": "_parallel_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to use instead of self.max_samples." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override value used when constructing base estimator. Only supported if the base estimator has a max_depth parameter." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a Bagging ensemble of estimators from the training\n set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nmax_samples : int or float, default=None\n Argument to use instead of self.max_samples.\n\nmax_depth : int, default=None\n Override value used when constructing base estimator. Only\n supported if the base estimator has a max_depth parameter.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\nReturns\n-------\nself : object" - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate out of bag predictions and score." - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimators_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "estimators_samples_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The subset of drawn samples for each base estimator.\n\nReturns a dynamically generated list of indices identifying\nthe samples used for fitting each member of the ensemble, i.e.,\nthe in-bag samples.\n\nNote: the list is re-created at each call to the property in order\nto reduce the object memory footprint by not storing the sampling\ndata. Thus fetching the property may be slower than expected." - } - ], - "docstring": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "BaggingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:`~sklearn.tree.DecisionTreeClassifier`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of base estimators in the ensemble." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to draw from X to train each base estimator (with replacement by default, see `bootstrap` for more details). - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples." - }, - { - "name": "max_features", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of features to draw from X to train each base estimator ( without replacement by default, see `bootstrap_features` for more details). - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether samples are drawn with replacement. If False, sampling without replacement is performed." - }, - { - "name": "bootstrap_features", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether features are drawn with replacement." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization error." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. See :term:`the Glossary `. .. versionadded:: 0.17 *warm_start* constructor parameter." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random resampling of the original dataset (sample wise and feature wise). If the base estimator accepts a `random_state` attribute, a different seed is generated for each instance in the ensemble. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class for X.\n\nThe predicted class of an input sample is computed as the class with\nthe highest mean predicted probability. If base estimators do not\nimplement a ``predict_proba`` method, then it resorts to voting.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted classes." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe mean predicted class probabilities of the base estimators in the\nensemble. If base estimators do not implement a ``predict_proba``\nmethod, then it resorts to voting and the predicted class probabilities\nof an input sample represents the proportion of estimators predicting\neach class.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the base\nestimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average of the decision functions of the base classifiers.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, k)\n The decision function of the input samples. The columns correspond\n to the classes in sorted order, as they appear in the attribute\n ``classes_``. Regression and binary classification are special\n cases with ``k == 1``, otherwise ``k==n_classes``." - } - ], - "docstring": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeClassifier`.\n\nn_estimators : int, default=10\n The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\nbootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* constructor parameter.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nn_features_ : int\n The number of features when :meth:`fit` is performed.\n\nestimators_ : list of estimators\n The collection of fitted base estimators.\n\nestimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_classes_ : int or list\n The number of classes.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = BaggingClassifier(base_estimator=SVC(),\n... n_estimators=10, random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012." - }, - { - "name": "BaggingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:`~sklearn.tree.DecisionTreeRegressor`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of base estimators in the ensemble." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to draw from X to train each base estimator (with replacement by default, see `bootstrap` for more details). - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples." - }, - { - "name": "max_features", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of features to draw from X to train each base estimator ( without replacement by default, see `bootstrap_features` for more details). - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether samples are drawn with replacement. If False, sampling without replacement is performed." - }, - { - "name": "bootstrap_features", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether features are drawn with replacement." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization error." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. See :term:`the Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random resampling of the original dataset (sample wise and feature wise). If the base estimator accepts a `random_state` attribute, a different seed is generated for each instance in the ensemble. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeRegressor`.\n\nn_estimators : int, default=10\n The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\nbootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nn_features_ : int\n The number of features when :meth:`fit` is performed.\n\nestimators_ : list of estimators\n The collection of fitted sub-estimators.\n\nestimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_prediction_` might contain NaN. This attribute exists only\n when ``oob_score`` is True.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=100, n_features=4,\n... n_informative=2, n_targets=1,\n... random_state=0, shuffle=False)\n>>> regr = BaggingRegressor(base_estimator=SVR(),\n... n_estimators=10, random_state=0).fit(X, y)\n>>> regr.predict([[0, 0, 0, 0]])\narray([-2.8720...])\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012." - } - ], - "functions": [ - { - "name": "_generate_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Draw randomly sampled indices." - }, - { - "name": "_generate_bagging_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Randomly draw feature and sample indices." - }, - { - "name": "_parallel_build_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to build a batch of estimators within a job." - }, - { - "name": "_parallel_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute (proba-)predictions within a job." - }, - { - "name": "_parallel_predict_log_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute log probabilities within a job." - }, - { - "name": "_parallel_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute decisions within a job." - }, - { - "name": "_parallel_predict_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute predictions within a job." - } - ] - }, - { - "name": "sklearn.ensemble._base", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numbers", - "from typing import List", - "import numpy as np", - "from joblib import effective_n_jobs", - "from base import clone", - "from base import is_classifier", - "from base import is_regressor", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from utils import Bunch", - "from utils import _print_elapsed_time", - "from utils import check_random_state", - "from utils.metaestimators import _BaseComposition" - ], - "classes": [ - { - "name": "BaseEnsemble", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the ensemble is built." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of estimators in the ensemble." - }, - { - "name": "estimator_params", - "type": "List[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The list of attributes to use as parameters when instantiating a new base estimator. If none are given, default parameters are used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and the n_estimator attribute.\n\nSets the base_estimator_` attributes." - }, - { - "name": "_make_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make and configure a copy of the `base_estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators." - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the number of estimators in the ensemble." - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the index'th estimator in the ensemble." - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return iterator over estimators in the ensemble." - } - ], - "docstring": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.\n\nParameters\n----------\nbase_estimator : object\n The base estimator from which the ensemble is built.\n\nn_estimators : int, default=10\n The number of estimators in the ensemble.\n\nestimator_params : list of str, default=tuple()\n The list of attributes to use as parameters when instantiating a\n new base estimator. If none are given, default parameters are used.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of estimators\n The collection of fitted base estimators." - }, - { - "name": "_BaseHeterogeneousEnsemble", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ensemble of estimators to use in the ensemble. Each element of the list is defined as a tuple of string (i.e. name of the estimator) and an estimator instance. An estimator can be set to `'drop'` using `set_params`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "named_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [ - { - "name": "**params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specific parameters using e.g. `set_params(parameter_name=new_value)`. In addition, to setting the parameters of the estimator, the individual estimator of the estimators can also be set, or can be removed by setting them to 'drop'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the parameters of an estimator from the ensemble.\n\nValid parameter keys can be listed with `get_params()`. Note that you\ncan directly set the parameters of the estimators contained in\n`estimators`.\n\nParameters\n----------\n**params : keyword arguments\n Specific parameters using e.g.\n `set_params(parameter_name=new_value)`. In addition, to setting the\n parameters of the estimator, the individual estimator of the\n estimators can also be set, or can be removed by setting them to\n 'drop'." - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Setting it to True gets the various estimators and the parameters of the estimators as well." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the parameters of an estimator from the ensemble.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `estimators` parameter.\n\nParameters\n----------\ndeep : bool, default=True\n Setting it to True gets the various estimators and the parameters\n of the estimators as well." - } - ], - "docstring": "Base class for heterogeneous ensemble of learners.\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n The ensemble of estimators to use in the ensemble. Each element of the\n list is defined as a tuple of string (i.e. name of the estimator) and\n an estimator instance. An estimator can be set to `'drop'` using\n `set_params`.\n\nAttributes\n----------\nestimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it will not\n appear in `estimators_`." - } - ], - "functions": [ - { - "name": "_fit_single_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to fit an estimator within a job." - }, - { - "name": "_set_random_states", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator with potential randomness managed by random_state parameters." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the generation of the random integers. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set fixed random_state parameters for an estimator.\n\nFinds all parameters ending ``random_state`` and sets them to integers\nderived from ``random_state``.\n\nParameters\n----------\nestimator : estimator supporting get/set_params\n Estimator with potential randomness managed by random_state\n parameters.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n integers. Pass an int for reproducible output across multiple function\n calls.\n See :term:`Glossary `.\n\nNotes\n-----\nThis does not necessarily set *all* ``random_state`` attributes that\ncontrol an estimator's randomness, only those accessible through\n``estimator.get_params()``. ``random_state``s not controlled include\nthose belonging to:\n\n * cross-validation splitters\n * ``scipy.stats`` rvs" - }, - { - "name": "_partition_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to partition estimators between jobs." - } - ] - }, - { - "name": "sklearn.ensemble._forest", - "imports": [ - "import numbers", - "from warnings import catch_warnings", - "from warnings import simplefilter", - "from warnings import warn", - "import threading", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.sparse import issparse", - "from scipy.sparse import hstack as sparse_hstack", - "from joblib import Parallel", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from metrics import r2_score", - "from preprocessing import OneHotEncoder", - "from tree import DecisionTreeClassifier", - "from tree import DecisionTreeRegressor", - "from tree import ExtraTreeClassifier", - "from tree import ExtraTreeRegressor", - "from tree._tree import DTYPE", - "from tree._tree import DOUBLE", - "from utils import check_random_state", - "from utils import check_array", - "from utils import compute_sample_weight", - "from exceptions import DataConversionWarning", - "from _base import BaseEnsemble", - "from _base import _partition_estimators", - "from utils.fixes import delayed", - "from utils.fixes import _joblib_parallel_args", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BaseForest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply trees in the forest to X, return leaf indices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : ndarray of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the forest,\n return the index of the leaf x ends up in." - }, - { - "name": "decision_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the decision path in the forest.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator matrix where non zero elements indicates\n that the samples goes through the nodes. The matrix is of CSR\n format.\n\nn_nodes_ptr : ndarray of shape (n_estimators + 1,)\n The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n gives the indicator value for the i-th estimator." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a forest of trees from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, its dtype will be converted\n to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object" - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate out of bag predictions and score." - }, - { - "name": "_validate_y_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_X_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate X whenever one tries to predict, apply, predict_proba." - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros." - } - ], - "docstring": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "ForestClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute out-of-bag score." - }, - { - "name": "_validate_y_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class for X.\n\nThe predicted class of an input sample is a vote by the trees in\nthe forest, weighted by their probability estimates. That is,\nthe predicted class is the one with highest mean probability\nestimate across the trees.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample are computed as\nthe mean predicted class probabilities of the trees in the forest.\nThe class probability of a single tree is the fraction of samples of\nthe same class in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the trees in the\nforest.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - } - ], - "docstring": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "ForestRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the trees in the forest.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted values." - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute out-of-bag scores." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point." - } - ], - "docstring": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "RandomForestClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain. Note: this parameter is tree-specific." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\"). - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization accuracy." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``). See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "class_weight", - "type": "Literal[\"balanced\", \"balanced_subsample\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The \"balanced_subsample\" mode is the same as \"balanced\" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n Note: this parameter is tree-specific.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization accuracy.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\nn_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nSee Also\n--------\nDecisionTreeClassifier, ExtraTreesClassifier\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n>>> clf.fit(X, y)\nRandomForestClassifier(...)\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]" - }, - { - "name": "RandomForestRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"mse\", \"mae\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion, and \"mae\" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether to use out-of-bag samples to estimate the R^2 on unseen data." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``). See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n whether to use out-of-bag samples to estimate\n the R^2 on unseen data.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nDecisionTreeRegressor, ExtraTreesRegressor\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nThe default value ``max_features=\"auto\"`` uses ``n_features``\nrather than ``n_features / 3``. The latter was originally suggested in\n[1], whereas the former was more recently justified empirically in [2].\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n.. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n... random_state=0, shuffle=False)\n>>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n>>> regr.fit(X, y)\nRandomForestRegressor(...)\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-8.32987858]" - }, - { - "name": "ExtraTreesClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization accuracy." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls 3 sources of randomness: - the bootstrapping of the samples used when building trees (if ``bootstrap=True``) - the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``) - the draw of the splits for each of the `max_features` See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "class_weight", - "type": "Literal[\"balanced\", \"balanced_subsample\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The \"balanced_subsample\" mode is the same as \"balanced\" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization accuracy.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreesClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\nn_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.ExtraTreeClassifier : Base classifier for this ensemble.\nRandomForestClassifier : Ensemble Classifier based on trees with optimal\n splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import ExtraTreesClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nExtraTreesClassifier(random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])" - }, - { - "name": "ExtraTreesRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"mse\", \"mae\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion, and \"mae\" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the R^2 on unseen data." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls 3 sources of randomness: - the bootstrapping of the samples used when building trees (if ``bootstrap=True``) - the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``) - the draw of the splits for each of the `max_features` See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the R^2 on unseen data.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features.\n\nn_outputs_ : int\n The number of outputs.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.ExtraTreeRegressor : Base estimator for this ensemble.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import ExtraTreesRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n... X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.2708..." - }, - { - "name": "RandomTreesEmbedding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return a sparse CSR matrix, as default behavior, or to return a dense array compatible with dense pipeline operators." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the generation of the random `y` used to fit the trees and the draw of the splits for each feature at the trees' nodes. See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Use ``dtype=np.float32`` for maximum efficiency. Sparse matrices are also supported, use sparse ``csc_matrix`` for maximum efficiency." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data used to build forests. Use ``dtype=np.float32`` for maximum efficiency." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator and transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data used to build forests. Use ``dtype=np.float32`` for\n maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data to be transformed. Use ``dtype=np.float32`` for maximum efficiency. Sparse matrices are also supported, use sparse ``csr_matrix`` for maximum efficiency." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data to be transformed. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csr_matrix`` for maximum efficiency.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset." - } - ], - "docstring": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n Number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\nmax_depth : int, default=5\n The maximum depth of each tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` is the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` is the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nsparse_output : bool, default=True\n Whether or not to return a sparse CSR matrix, as default behavior,\n or to return a dense array compatible with dense pipeline operators.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the generation of the random `y` used to fit the trees\n and the draw of the splits for each feature at the trees' nodes.\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier instance\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier instances\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances (the higher, the more important the feature).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\none_hot_encoder_ : OneHotEncoder instance\n One-hot encoder used to create the sparse embedding.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n.. [2] Moosmann, F. and Triggs, B. and Jurie, F. \"Fast discriminative\n visual codebooks using randomized clustering forests\"\n NIPS 2007\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomTreesEmbedding\n>>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n>>> random_trees = RandomTreesEmbedding(\n... n_estimators=5, random_state=0, max_depth=1).fit(X)\n>>> X_sparse_embedding = random_trees.transform(X)\n>>> X_sparse_embedding.toarray()\narray([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])" - } - ], - "functions": [ - { - "name": "_get_n_samples_bootstrap", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples in the dataset." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of samples to draw from the total available: - if float, this indicates a fraction of the total and should be the interval `(0, 1)`; - if int, this indicates the exact number of samples; - if None, this indicates the total number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the number of samples in a bootstrap sample.\n\nParameters\n----------\nn_samples : int\n Number of samples in the dataset.\nmax_samples : int or float\n The maximum number of samples to draw from the total available:\n - if float, this indicates a fraction of the total and should be\n the interval `(0, 1)`;\n - if int, this indicates the exact number of samples;\n - if None, this indicates the total number of samples.\n\nReturns\n-------\nn_samples_bootstrap : int\n The total number of samples to draw for the bootstrap sample." - }, - { - "name": "_generate_sample_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to _parallel_build_trees function." - }, - { - "name": "_generate_unsampled_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to forest._set_oob_score function." - }, - { - "name": "_parallel_build_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to fit a single tree in parallel." - }, - { - "name": "_accumulate_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is a utility function for joblib's Parallel.\n\nIt can't go locally in ForestClassifier or ForestRegressor, because joblib\ncomplains that it cannot pickle it when placed there." - } - ] - }, - { - "name": "sklearn.ensemble._gb", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "from _base import BaseEnsemble", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import BaseEstimator", - "from base import is_classifier", - "from utils import deprecated", - "from _gradient_boosting import predict_stages", - "from _gradient_boosting import predict_stage", - "from _gradient_boosting import _random_sample_mask", - "import numbers", - "import numpy as np", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from time import time", - "from model_selection import train_test_split", - "from tree import DecisionTreeRegressor", - "from tree._tree import DTYPE", - "from tree._tree import DOUBLE", - "from None import _gb_losses", - "from utils import check_random_state", - "from utils import check_array", - "from utils import column_or_1d", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.multiclass import check_classification_targets", - "from exceptions import NotFittedError", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "VerboseReporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level. If ``verbose==1`` output is printed once in a while (when iteration mod verbose_mod is zero).; if larger than 1 then output is printed for each update." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init", - "decorators": [], - "parameters": [ - { - "name": "est", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator" - }, - { - "name": "begin_at_stage", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "stage at which to begin reporting" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize reporter\n\nParameters\n----------\nest : Estimator\n The estimator\n\nbegin_at_stage : int, default=0\n stage at which to begin reporting" - }, - { - "name": "update", - "decorators": [], - "parameters": [ - { - "name": "j", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The new iteration." - }, - { - "name": "est", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update reporter with new iteration.\n\nParameters\n----------\nj : int\n The new iteration.\nest : Estimator\n The estimator." - } - ], - "docstring": "Reports verbose output to stdout.\n\nParameters\n----------\nverbose : int\n Verbosity level. If ``verbose==1`` output is printed once in a while\n (when iteration mod verbose_mod is zero).; if larger than 1 then output\n is printed for each update." - }, - { - "name": "BaseGradientBoosting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called by fit to validate y." - }, - { - "name": "_fit_stage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit another stage of ``_n_classes`` trees to the boosting model." - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check validity of parameters and raise ValueError if not valid." - }, - { - "name": "_init_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize model state and allocate model state data structures. " - }, - { - "name": "_clear_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Clear the state of the gradient boosting model. " - }, - { - "name": "_resize_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Add additional ``n_estimators`` entries to all attributes." - }, - { - "name": "_is_initialized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_initialized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the estimator is initialized, raising an error if not." - }, - { - "name": "_warn_mae_for_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (strings or integers in classification, real numbers in regression) For classification, labels must correspond to classes." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - }, - { - "name": "monitor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The monitor is called after each iteration with the current iteration, a reference to the estimator and the local variables of ``_fit_stages`` as keyword arguments ``callable(i, self, locals())``. If the callable returns ``True`` the fitting procedure is stopped. The monitor can be used for various things such as computing held-out estimates, early stopping, model introspect, and snapshoting." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ny : array-like of shape (n_samples,)\n Target values (strings or integers in classification, real numbers\n in regression)\n For classification, labels must correspond to classes.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nmonitor : callable, default=None\n The monitor is called after each iteration with the current\n iteration, a reference to the estimator and the local variables of\n ``_fit_stages`` as keyword arguments ``callable(i, self,\n locals())``. If the callable returns ``True`` the fitting procedure\n is stopped. The monitor can be used for various things such as\n computing held-out estimates, early stopping, model introspect, and\n snapshoting.\n\nReturns\n-------\nself : object" - }, - { - "name": "_fit_stages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Iteratively fits the stages.\n\nFor each stage it computes the progress (OOB, train score)\nand delegates to ``_fit_stage``.\nReturns the number of stages fit; might differ from ``n_estimators``\ndue to early stopping." - }, - { - "name": "_make_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_predict_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check input and compute raw predictions of the init estimator." - }, - { - "name": "_raw_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the sum of the trees raw predictions (+ init estimator)." - }, - { - "name": "_staged_raw_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nraw_predictions : generator of ndarray of shape (n_samples, k)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``." - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features,)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point." - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n In the case of binary classification n_classes is 1." - } - ], - "docstring": "Abstract base class for Gradient Boosting." - }, - { - "name": "GradientBoostingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['deviance', 'exponential']", - "hasDefault": true, - "default": "'deviance'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be optimized. 'deviance' refers to deviance (= logistic regression) for classification with probabilistic outputs. For loss 'exponential' gradient boosting recovers the AdaBoost algorithm." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance." - }, - { - "name": "subsample", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias." - }, - { - "name": "criterion", - "type": "Literal['friedman_mse', 'mse', 'mae']", - "hasDefault": true, - "default": "'friedman_mse'", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are 'friedman_mse' for the mean squared error with improvement score by Friedman, 'mse' for mean squared error, and 'mae' for the mean absolute error. The default value of 'friedman_mse' is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18 .. deprecated:: 0.24 `criterion='mae'` is deprecated and will be removed in version 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'` instead, as trees should use a least-square criterion in Gradient Boosting." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "init", - "type": "Literal['zero']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object that is used to compute the initial predictions. ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If 'zero', the initial raw predictions are set to zero. By default, a ``DummyEstimator`` predicting the classes priors is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to each Tree estimator at each boosting iteration. In addition, it controls the random permutation of the features at each split (see Notes for more details). It also controls the random spliting of the training data to obtain a validation set if `n_iter_no_change` is not None. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_features", - "type": "Literal['auto', 'sqrt', 'log2']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If 'auto', then `max_features=sqrt(n_features)`. - If 'sqrt', then `max_features=sqrt(n_features)`. - If 'log2', then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations. The split is stratified. .. versionadded:: 0.20" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. .. versionadded:: 0.20" - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_warn_mae_for_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, n_classes) or (n_samples,)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n order of the classes corresponds to that in the attribute\n :term:`classes_`. Regression and binary classification produce an\n array of shape (n_samples,)." - }, - { - "name": "staged_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict class at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nRaises\n------\nAttributeError\n If the ``loss`` does not support probabilities.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nRaises\n------\nAttributeError\n If the ``loss`` does not support probabilities.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "staged_predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict class probabilities at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples." - } - ], - "docstring": "Gradient Boosting for classification.\n\nGB builds an additive model in a\nforward stage-wise fashion; it allows for the optimization of\narbitrary differentiable loss functions. In each stage ``n_classes_``\nregression trees are fit on the negative gradient of the\nbinomial or multinomial deviance loss function. Binary classification\nis a special case where only a single regression tree is induced.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : {'deviance', 'exponential'}, default='deviance'\n The loss function to be optimized. 'deviance' refers to\n deviance (= logistic regression) for classification\n with probabilistic outputs. For loss 'exponential' gradient\n boosting recovers the AdaBoost algorithm.\n\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\nn_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\nsubsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\ncriterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are 'friedman_mse' for the mean squared error with improvement\n score by Friedman, 'mse' for mean squared error, and 'mae' for\n the mean absolute error. The default value of 'friedman_mse' is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'`\n instead, as trees should use a least-square criterion in\n Gradient Boosting.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_depth : int, default=3\n The maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\ninit : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n 'zero', the initial raw predictions are set to zero. By default, a\n ``DummyEstimator`` predicting the classes priors is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If 'auto', then `max_features=sqrt(n_features)`.\n - If 'sqrt', then `max_features=sqrt(n_features)`.\n - If 'log2', then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations. The split is stratified.\n\n .. versionadded:: 0.20\n\ntol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nn_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\n .. versionadded:: 0.20\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n The concrete ``LossFunction`` object.\n\ninit_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, ``loss_.K``)\n The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n classification, otherwise n_classes.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_features_ : int\n The number of data features.\n\nn_classes_ : int\n The number of classes.\n\nmax_features_ : int\n The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingClassifier : Histogram-based Gradient Boosting\n Classification Tree.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n tree classifiers on various sub-samples of the dataset and uses\n averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n on the original dataset and then fits additional copies of the\n classifier on the same dataset where the weights of incorrectly\n classified instances are adjusted such that subsequent classifiers\n focus more on difficult cases.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\nThe following example shows how to fit a gradient boosting classifier with\n100 decision stumps as weak learners.\n\n>>> from sklearn.datasets import make_hastie_10_2\n>>> from sklearn.ensemble import GradientBoostingClassifier\n\n>>> X, y = make_hastie_10_2(random_state=0)\n>>> X_train, X_test = X[:2000], X[2000:]\n>>> y_train, y_test = y[:2000], y[2000:]\n\n>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n... max_depth=1, random_state=0).fit(X_train, y_train)\n>>> clf.score(X_test, y_test)\n0.913..." - }, - { - "name": "GradientBoostingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['ls', 'lad', 'huber', 'quantile']", - "hasDefault": true, - "default": "'ls'", - "limitation": null, - "ignored": false, - "docstring": "Loss function to be optimized. 'ls' refers to least squares regression. 'lad' (least absolute deviation) is a highly robust loss function solely based on order information of the input variables. 'huber' is a combination of the two. 'quantile' allows quantile regression (use `alpha` to specify the quantile)." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance." - }, - { - "name": "subsample", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias." - }, - { - "name": "criterion", - "type": "Literal['friedman_mse', 'mse', 'mae']", - "hasDefault": true, - "default": "'friedman_mse'", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"friedman_mse\" for the mean squared error with improvement score by Friedman, \"mse\" for mean squared error, and \"mae\" for the mean absolute error. The default value of \"friedman_mse\" is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18 .. deprecated:: 0.24 `criterion='mae'` is deprecated and will be removed in version 1.1 (renaming of 0.26). The correct way of minimizing the absolute error is to use `loss='lad'` instead." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "init", - "type": "Literal['zero']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object that is used to compute the initial predictions. ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the initial raw predictions are set to zero. By default a ``DummyEstimator`` is used, predicting either the average target value (for loss='ls'), or a quantile for the other losses." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to each Tree estimator at each boosting iteration. In addition, it controls the random permutation of the features at each split (see Notes for more details). It also controls the random spliting of the training data to obtain a validation set if `n_iter_no_change` is not None. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_features", - "type": "Literal['auto', 'sqrt', 'log2']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The alpha-quantile of the huber loss function and the quantile loss function. Only if ``loss='huber'`` or ``loss='quantile'``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations. .. versionadded:: 0.20" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. .. versionadded:: 0.20" - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_warn_mae_for_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict regression target at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples." - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator." - }, - { - "name": "n_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Gradient Boosting for regression.\n\nGB builds an additive model in a forward stage-wise fashion;\nit allows for the optimization of arbitrary differentiable loss functions.\nIn each stage a regression tree is fit on the negative gradient of the\ngiven loss function.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : {'ls', 'lad', 'huber', 'quantile'}, default='ls'\n Loss function to be optimized. 'ls' refers to least squares\n regression. 'lad' (least absolute deviation) is a highly robust\n loss function solely based on order information of the input\n variables. 'huber' is a combination of the two. 'quantile'\n allows quantile regression (use `alpha` to specify the quantile).\n\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\nn_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\nsubsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\ncriterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are \"friedman_mse\" for the mean squared error with improvement\n score by Friedman, \"mse\" for mean squared error, and \"mae\" for\n the mean absolute error. The default value of \"friedman_mse\" is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). The correct way of minimizing the absolute\n error is to use `loss='lad'` instead.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_depth : int, default=3\n Maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\ninit : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n initial raw predictions are set to zero. By default a\n ``DummyEstimator`` is used, predicting either the average target value\n (for loss='ls'), or a quantile for the other losses.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nalpha : float, default=0.9\n The alpha-quantile of the huber loss function and the quantile\n loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations.\n\n .. versionadded:: 0.20\n\ntol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n The concrete ``LossFunction`` object.\n\ninit_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n The collection of fitted sub-estimators.\n\nn_classes_ : int\n The number of classes, set to 1 for regressors.\n\n .. deprecated:: 0.24\n Attribute ``n_classes_`` was deprecated in version 0.24 and\n will be removed in 1.1 (renaming of 0.26).\n\nn_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\nn_features_ : int\n The number of data features.\n\nmax_features_ : int\n The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingRegressor : Histogram-based Gradient Boosting\n Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.tree.RandomForestRegressor : A random forest regressor.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> reg = GradientBoostingRegressor(random_state=0)\n>>> reg.fit(X_train, y_train)\nGradientBoostingRegressor(random_state=0)\n>>> reg.predict(X_test[1:2])\narray([-61...])\n>>> reg.score(X_test, y_test)\n0.4...\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._gb_losses", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import expit", - "from scipy.special import logsumexp", - "from tree._tree import TREE_LEAF", - "from utils.stats import _weighted_percentile", - "from dummy import DummyClassifier", - "from dummy import DummyRegressor" - ], - "classes": [ - { - "name": "LossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Default ``init`` estimator for loss function. " - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "update_terminal_regions", - "decorators": [], - "parameters": [ - { - "name": "tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The tree object." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data array." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "residual", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The residuals (usually the negative gradient)." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample." - }, - { - "name": "sample_mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample mask to be used." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by ``learning_rate``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The index of the estimator being updated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update the terminal regions (=leaves) of the given tree and\nupdates the current predictions of the model. Traverses tree\nand invokes template method `_update_terminal_region`.\n\nParameters\n----------\ntree : tree.Tree\n The tree object.\nX : ndarray of shape (n_samples, n_features)\n The data array.\ny : ndarray of shape (n_samples,)\n The target labels.\nresidual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\nsample_weight : ndarray of shape (n_samples,)\n The weight of each sample.\nsample_mask : ndarray of shape (n_samples,)\n The sample mask to be used.\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\nk : int, default=0\n The index of the estimator being updated." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Template method for updating terminal regions (i.e., leaves)." - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data array." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to use to compute the predictions." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the initial raw predictions.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data array.\nestimator : object\n The estimator to use to compute the predictions.\n\nReturns\n-------\nraw_predictions : ndarray of shape (n_samples, K)\n The initial raw predictions. K is equal to 1 for binary\n classification and regression, and equal to the number of classes\n for multiclass classification. ``raw_predictions`` is casted\n into float64." - } - ], - "docstring": "Abstract base class for various loss functions.\n\nParameters\n----------\nn_classes : int\n Number of classes.\n\nAttributes\n----------\nK : int\n The number of regression trees to be induced;\n 1 for regression and binary classification;\n ``n_classes`` for multi-class classification." - }, - { - "name": "RegressionLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_init_estimator", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The init estimator to check." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure estimator has the required fit and predict methods.\n\nParameters\n----------\nestimator : object\n The init estimator to check." - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for regression loss functions." - }, - { - "name": "LeastSquaresError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the least squares loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute half of the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples,)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "update_terminal_regions", - "decorators": [], - "parameters": [ - { - "name": "tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The tree object." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data array." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "residual", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The residuals (usually the negative gradient)." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample." - }, - { - "name": "sample_mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample mask to be used." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by ``learning_rate``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The index of the estimator being updated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least squares does not need to update terminal regions.\n\nBut it has to update the predictions.\n\nParameters\n----------\ntree : tree.Tree\n The tree object.\nX : ndarray of shape (n_samples, n_features)\n The data array.\ny : ndarray of shape (n_samples,)\n The target labels.\nresidual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\nsample_weight : ndarray of shape (n,)\n The weight of each sample.\nsample_mask : ndarray of shape (n,)\n The sample mask to be used.\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\nk : int, default=0\n The index of the estimator being updated." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Loss function for least squares (LS) estimation.\nTerminal regions do not need to be updated for least squares.\n\nParameters\n----------\nn_classes : int\n Number of classes." - }, - { - "name": "LeastAbsoluteError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the least absolute error.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\n1.0 if y - raw_predictions > 0.0 else -1.0\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "LAD updates terminal regions to median estimates." - } - ], - "docstring": "Loss function for least absolute deviation (LAD) regression.\n\nParameters\n----------\nn_classes : int\n Number of classes" - }, - { - "name": "HuberLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Percentile at which to extract score." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Huber loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Huber loss function for robust regression.\n\nM-Regression proposed in Friedman 2001.\n\nParameters\n----------\nalpha : float, default=0.9\n Percentile at which to extract score.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001." - }, - { - "name": "QuantileLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The percentile." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Quantile loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Loss function for quantile regression.\n\nQuantile regression allows to estimate the percentiles\nof the conditional distribution of the target.\n\nParameters\n----------\nalpha : float, default=0.9\n The percentile." - }, - { - "name": "ClassificationLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [ - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Template method to convert raw predictions into probabilities.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nReturns\n-------\nprobas : ndarray of shape (n_samples, K)\n The predicted probabilities." - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [ - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Template method to convert raw predictions to decisions.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nReturns\n-------\nencoded_predictions : ndarray of shape (n_samples, K)\n The predicted encoded labels." - }, - { - "name": "check_init_estimator", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The init estimator to check." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure estimator has fit and predict_proba methods.\n\nParameters\n----------\nestimator : object\n The init estimator to check." - } - ], - "docstring": "Base class for classification loss functions. " - }, - { - "name": "BinomialDeviance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the deviance (= 2 * negative log-likelihood).\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute half of the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\n sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual" - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binomial deviance loss function for binary classification.\n\nBinary classification is a special case; here, we only need to\nfit one tree instead of ``n_classes`` trees.\n\nParameters\n----------\nn_classes : int\n Number of classes." - }, - { - "name": "MultinomialDeviance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Multinomial deviance.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The index of the class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute negative gradient for the ``k``-th class.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\nk : int, default=0\n The index of the class." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a single Newton-Raphson step. " - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multinomial deviance loss function for multi-class classification.\n\nFor multi-class classification we need to fit ``n_classes`` trees at\neach stage.\n\nParameters\n----------\nn_classes : int\n Number of classes." - }, - { - "name": "ExponentialLoss", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the exponential loss\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the residual (= negative gradient).\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Exponential loss function for binary classification.\n\nSame loss as AdaBoost.\n\nParameters\n----------\nn_classes : int\n Number of classes.\n\nReferences\n----------\nGreg Ridgeway, Generalized Boosted Models: A guide to the gbm package, 2007" - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._iforest", - "imports": [ - "import numbers", - "import numpy as np", - "from scipy.sparse import issparse", - "from warnings import warn", - "from tree import ExtraTreeRegressor", - "from utils import check_random_state", - "from utils import check_array", - "from utils import gen_batches", - "from utils import get_chunk_n_rows", - "from utils.fixes import _joblib_parallel_args", - "from utils.validation import check_is_fitted", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from base import OutlierMixin", - "from _bagging import BaseBagging" - ], - "classes": [ - { - "name": "IsolationForest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of base estimators in the ensemble." - }, - { - "name": "max_samples", - "type": "Union[float, int, Literal[\"auto\"]]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to draw from X to train each base estimator. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. - If \"auto\", then `max_samples=min(256, n_samples)`. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling)." - }, - { - "name": "contamination", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to define the threshold on the scores of the samples. - If 'auto', the threshold is determined as in the original paper. - If float, the contamination should be in the range [0, 0.5]. .. versionchanged:: 0.22 The default value of ``contamination`` changed from 0.1 to ``'auto'``." - }, - { - "name": "max_features", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of features to draw from X to train each base estimator. - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, individual trees are fit on random subsets of the training data sampled with replacement. If False, sampling without replacement is performed." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo-randomness of the selection of the feature and split values for each branching step and each tree in the forest. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity of the tree building process." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `. .. versionadded:: 0.21" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_parallel_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Use ``dtype=np.float32`` for maximum efficiency. Sparse matrices are also supported, use sparse ``csc_matrix`` for maximum efficiency." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Fitted estimator." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict if a particular sample is an outlier or not.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n For each observation, tells whether or not (+1 or -1) it should\n be considered as an inlier according to the fitted model." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average anomaly score of X of the base classifiers.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal. Negative scores represent outliers,\n positive scores represent inliers." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the anomaly score defined in the original paper.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal." - }, - { - "name": "_compute_chunked_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "subsample_features", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether features should be subsampled." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the score of each samples in X going through the extra trees.\n\nParameters\n----------\nX : array-like or sparse matrix\n Data matrix.\n\nsubsample_features : bool\n Whether features should be subsampled." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_estimators : int, default=100\n The number of base estimators in the ensemble.\n\nmax_samples : \"auto\", int or float, default=\"auto\"\n The number of samples to draw from X to train each base estimator.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n If max_samples is larger than the number of samples provided,\n all samples will be used for all trees (no sampling).\n\ncontamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Used when fitting to define the threshold\n on the scores of the samples.\n\n - If 'auto', the threshold is determined as in the\n original paper.\n - If float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator.\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=False\n If True, individual trees are fit on random subsets of the training\n data sampled with replacement. If False, sampling without replacement\n is performed.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo-randomness of the selection of the feature\n and split values for each branching step and each tree in the forest.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity of the tree building process.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n .. versionadded:: 0.21\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor instance\n The child estimator template used to create the collection of\n fitted sub-estimators.\n\nestimators_ : list of ExtraTreeRegressor instances\n The collection of fitted sub-estimators.\n\nestimators_features_ : list of ndarray\n The subset of drawn features for each base estimator.\n\nestimators_samples_ : list of ndarray\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator.\n\nmax_samples_ : int\n The actual number of samples.\n\noffset_ : float\n Offset used to define the decision function from the raw scores. We\n have the relation: ``decision_function = score_samples - offset_``.\n ``offset_`` is defined as follows. When the contamination parameter is\n set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n close to 0 and the scores of outliers are close to -1. When a\n contamination parameter different than \"auto\" is provided, the offset\n is defined in such a way we obtain the expected number of outliers\n (samples with decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nNotes\n-----\nThe implementation is based on an ensemble of ExtraTreeRegressor. The\nmaximum depth of each tree is set to ``ceil(log_2(n))`` where\n:math:`n` is the number of samples used to build the tree\n(see (Liu et al., 2008) for more details).\n\nReferences\n----------\n.. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n.. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n anomaly detection.\" ACM Transactions on Knowledge Discovery from\n Data (TKDD) 6.1 (2012): 3.\n\nSee Also\n----------\nsklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n Gaussian distributed dataset.\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n Estimate the support of a high-dimensional distribution.\n The implementation is based on libsvm.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n using Local Outlier Factor (LOF).\n\nExamples\n--------\n>>> from sklearn.ensemble import IsolationForest\n>>> X = [[-1.1], [0.3], [0.5], [100]]\n>>> clf = IsolationForest(random_state=0).fit(X)\n>>> clf.predict([[0.1], [0], [90]])\narray([ 1, 1, -1])" - } - ], - "functions": [ - { - "name": "_average_path_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\n The number of training samples in each test sample leaf, for\n each estimators.\n\nReturns\n-------\naverage_path_length : ndarray of shape (n_samples,)" - } - ] - }, - { - "name": "sklearn.ensemble._stacking", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from copy import deepcopy", - "import numpy as np", - "from joblib import Parallel", - "import scipy.sparse as sparse", - "from base import clone", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import is_classifier", - "from base import is_regressor", - "from exceptions import NotFittedError", - "from utils._estimator_html_repr import _VisualBlock", - "from _base import _fit_single_estimator", - "from _base import _BaseHeterogeneousEnsemble", - "from linear_model import LogisticRegression", - "from linear_model import RidgeCV", - "from model_selection import cross_val_predict", - "from model_selection import check_cv", - "from preprocessing import LabelEncoder", - "from utils import Bunch", - "from utils.metaestimators import if_delegate_has_method", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "_BaseStacking", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_clone_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_concatenate_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)" - }, - { - "name": "_method_name", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights. .. versionchanged:: 0.23 when not None, `sample_weight` is passed to all underlying estimators" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,) or default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionchanged:: 0.23\n when not None, `sample_weight` is passed to all underlying\n estimators\n\nReturns\n-------\nself : object" - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Number of features seen during :term:`fit`." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Concatenate and return the predictions of the estimators." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "**predict_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to the `predict` called by the `final_estimator`. Note that this may be used to return uncertainties from some estimators with `return_std` or `return_cov`. Be aware that it will only accounts for uncertainty in the final estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n**predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets." - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for stacking method." - }, - { - "name": "StackingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Base estimators which will be stacked together. Each element of the list is defined as a tuple of string (i.e. name) and an estimator instance. An estimator can be set to 'drop' using `set_params`." - }, - { - "name": "final_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A classifier which will be used to combine the base estimators. The default classifier is a :class:`~sklearn.linear_model.LogisticRegression`." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy used in `cross_val_predict` to train `final_estimator`. Possible inputs for cv are: * None, to use the default 5-fold cross validation, * integer, to specify the number of folds in a (Stratified) KFold, * An object to be used as a cross-validation generator, * An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and y is either binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: A larger number of split will provide no benefits if the number of training samples is large enough. Indeed, the training time will increase. ``cv`` is not used for model evaluation but for prediction." - }, - { - "name": "stack_method", - "type": "Literal['auto', 'predict_proba', 'decision_function', 'predict']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Methods called for each base estimator. It can be: * if 'auto', it will try to invoke, for each estimator, `'predict_proba'`, `'decision_function'` or `'predict'` in that order. * otherwise, one of `'predict_proba'`, `'decision_function'` or `'predict'`. If the method is not implemented by the estimator, it will raise an error." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel all `estimators` `fit`. `None` means 1 unless in a `joblib.parallel_backend` context. -1 means using all processors. See Glossary for more details." - }, - { - "name": "passthrough", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When False, only the predictions of estimators will be used as training data for `final_estimator`. When True, the `final_estimator` is trained on the predictions as well as the original training data." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "**predict_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to the `predict` called by the `final_estimator`. Note that this may be used to return uncertainties from some estimators with `return_std` or `return_cov`. Be aware that it will only accounts for uncertainty in the final estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n**predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X using\n`final_estimator_.predict_proba`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes) or list of ndarray of shape (n_output,)\n The class probabilities of the input samples." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict decision function for samples in X using\n`final_estimator_.decision_function`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\ndecisions : ndarray of shape (n_samples,), (n_samples, n_classes), or (n_samples, n_classes * (n_classes-1) / 2)\n The decision function computed the final estimator." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return class labels or probabilities for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\ny_preds : ndarray of shape (n_samples, n_estimators) or (n_samples, n_classes * n_estimators)\n Prediction outputs for each estimator." - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n A classifier which will be used to combine the base estimators.\n The default classifier is a\n :class:`~sklearn.linear_model.LogisticRegression`.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\nstack_method : {'auto', 'predict_proba', 'decision_function', 'predict'}, default='auto'\n Methods called for each base estimator. It can be:\n\n * if 'auto', it will try to invoke, for each estimator,\n `'predict_proba'`, `'decision_function'` or `'predict'` in that\n order.\n * otherwise, one of `'predict_proba'`, `'decision_function'` or\n `'predict'`. If the method is not implemented by the estimator, it\n will raise an error.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel all `estimators` `fit`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\nverbose : int, default=0\n Verbosity level.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\nfinal_estimator_ : estimator\n The classifier which predicts given the output of `estimators_`.\n\nstack_method_ : list of str\n The method used by each base estimator.\n\nNotes\n-----\nWhen `predict_proba` is used by each estimator (i.e. most of the time for\n`stack_method='auto'` or specifically for `stack_method='predict_proba'`),\nThe first column predicted by each estimator will be dropped in the case\nof a binary classification problem. Indeed, both feature will be perfectly\ncollinear.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.ensemble import StackingClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> estimators = [\n... ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n... ('svr', make_pipeline(StandardScaler(),\n... LinearSVC(random_state=42)))\n... ]\n>>> clf = StackingClassifier(\n... estimators=estimators, final_estimator=LogisticRegression()\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, stratify=y, random_state=42\n... )\n>>> clf.fit(X_train, y_train).score(X_test, y_test)\n0.9..." - }, - { - "name": "StackingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Base estimators which will be stacked together. Each element of the list is defined as a tuple of string (i.e. name) and an estimator instance. An estimator can be set to 'drop' using `set_params`." - }, - { - "name": "final_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A regressor which will be used to combine the base estimators. The default regressor is a :class:`~sklearn.linear_model.RidgeCV`." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy used in `cross_val_predict` to train `final_estimator`. Possible inputs for cv are: * None, to use the default 5-fold cross validation, * integer, to specify the number of folds in a (Stratified) KFold, * An object to be used as a cross-validation generator, * An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and y is either binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: A larger number of split will provide no benefits if the number of training samples is large enough. Indeed, the training time will increase. ``cv`` is not used for model evaluation but for prediction." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for `fit` of all `estimators`. `None` means 1 unless in a `joblib.parallel_backend` context. -1 means using all processors. See Glossary for more details." - }, - { - "name": "passthrough", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When False, only the predictions of estimators will be used as training data for `final_estimator`. When True, the `final_estimator` is trained on the predictions as well as the original training data." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\ny_preds : ndarray of shape (n_samples, n_estimators)\n Prediction outputs for each estimator." - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n A regressor which will be used to combine the base estimators.\n The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for `fit` of all `estimators`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\nverbose : int, default=0\n Verbosity level.\n\nAttributes\n----------\nestimators_ : list of estimator\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n\nfinal_estimator_ : estimator\n The regressor to stacked the base estimators fitted.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import StackingRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> estimators = [\n... ('lr', RidgeCV()),\n... ('svr', LinearSVR(random_state=42))\n... ]\n>>> reg = StackingRegressor(\n... estimators=estimators,\n... final_estimator=RandomForestRegressor(n_estimators=10,\n... random_state=42)\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=42\n... )\n>>> reg.fit(X_train, y_train).score(X_test, y_test)\n0.3..." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._voting", - "imports": [ - "from abc import abstractmethod", - "import numpy as np", - "from joblib import Parallel", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import clone", - "from _base import _fit_single_estimator", - "from _base import _BaseHeterogeneousEnsemble", - "from preprocessing import LabelEncoder", - "from utils import Bunch", - "from utils.validation import check_is_fitted", - "from utils.multiclass import check_classification_targets", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from exceptions import NotFittedError", - "from utils._estimator_html_repr import _VisualBlock", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "_BaseVoting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_weights_not_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the weights of not `None` estimators." - }, - { - "name": "_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Collect results from clf.predict calls." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get common fit operations." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input samples" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (None for unsupervised transformations)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional fit parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return class labels or probabilities for each estimator.\n\nReturn predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n Input samples\n\ny : ndarray of shape (n_samples,), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array." - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for voting.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "VotingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones of those original estimators that will be stored in the class attribute ``self.estimators_``. An estimator can be set to ``'drop'`` using ``set_params``. .. versionchanged:: 0.21 ``'drop'`` is accepted. Using None was deprecated in 0.22 and support was removed in 0.24." - }, - { - "name": "voting", - "type": "Literal['hard', 'soft']", - "hasDefault": true, - "default": "'hard'", - "limitation": null, - "ignored": false, - "docstring": "If 'hard', uses predicted class labels for majority rule voting. Else if 'soft', predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers." - }, - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sequence of weights (`float` or `int`) to weight the occurrences of predicted class labels (`hard` voting) or class probabilities before averaging (`soft` voting). Uses uniform weights if `None`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for ``fit``. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18" - }, - { - "name": "flatten_transform", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Affects shape of transform output only when voting='soft' If voting='soft' and flatten_transform=True, transform method returns matrix with shape (n_samples, n_classifiers * n_classes). If flatten_transform=False, it returns (n_classifiers, n_samples, n_classes)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting will be printed as it is completed. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class labels for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nmaj : array-like of shape (n_samples,)\n Predicted class labels." - }, - { - "name": "_collect_probas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Collect results from clf.predict calls." - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X in 'soft' voting." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute probabilities of possible outcomes for samples in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\navg : array-like of shape (n_samples, n_classes)\n Weighted average probability for each class per sample." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return class labels or probabilities for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nprobabilities_or_labels\n If `voting='soft'` and `flatten_transform=True`:\n returns ndarray of shape (n_classifiers, n_samples *\n n_classes), being class probabilities calculated by each\n classifier.\n If `voting='soft' and `flatten_transform=False`:\n ndarray of shape (n_classifiers, n_samples, n_classes)\n If `voting='hard'`:\n ndarray of shape (n_samples, n_classifiers), being\n class labels predicted by each classifier." - } - ], - "docstring": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'``\n using ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\nvoting : {'hard', 'soft'}, default='hard'\n If 'hard', uses predicted class labels for majority rule voting.\n Else if 'soft', predicts the class label based on the argmax of\n the sums of the predicted probabilities, which is recommended for\n an ensemble of well-calibrated classifiers.\n\nweights : array-like of shape (n_classifiers,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted class labels (`hard` voting) or class probabilities\n before averaging (`soft` voting). Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nflatten_transform : bool, default=True\n Affects shape of transform output only when voting='soft'\n If voting='soft' and flatten_transform=True, transform method returns\n matrix with shape (n_samples, n_classifiers * n_classes). If\n flatten_transform=False, it returns\n (n_classifiers, n_samples, n_classes).\n\nverbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of classifiers\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\nclasses_ : array-like of shape (n_predictions,)\n The classes labels.\n\nSee Also\n--------\nVotingRegressor : Prediction voting regressor.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n>>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n>>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n>>> clf3 = GaussianNB()\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> eclf1 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n>>> eclf1 = eclf1.fit(X, y)\n>>> print(eclf1.predict(X))\n[1 1 1 2 2 2]\n>>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n... eclf1.named_estimators_['lr'].predict(X))\nTrue\n>>> eclf2 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n... voting='soft')\n>>> eclf2 = eclf2.fit(X, y)\n>>> print(eclf2.predict(X))\n[1 1 1 2 2 2]\n>>> eclf3 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n... voting='soft', weights=[2,1,1],\n... flatten_transform=True)\n>>> eclf3 = eclf3.fit(X, y)\n>>> print(eclf3.predict(X))\n[1 1 1 2 2 2]\n>>> print(eclf3.transform(X).shape)\n(6, 6)" - }, - { - "name": "VotingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones of those original estimators that will be stored in the class attribute ``self.estimators_``. An estimator can be set to ``'drop'`` using ``set_params``. .. versionchanged:: 0.21 ``'drop'`` is accepted. Using None was deprecated in 0.22 and support was removed in 0.24." - }, - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sequence of weights (`float` or `int`) to weight the occurrences of predicted values before averaging. Uses uniform weights if `None`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for ``fit``. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting will be printed as it is completed. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object\n Fitted estimator." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\npredictions: ndarray of shape (n_samples, n_classifiers)\n Values predicted by each regressor." - } - ], - "docstring": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'`` using\n ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\nweights : array-like of shape (n_regressors,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted values before averaging. Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of regressors\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\nnamed_estimators_ : Bunch\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\nSee Also\n--------\nVotingClassifier : Soft Voting/Majority Rule classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import VotingRegressor\n>>> r1 = LinearRegression()\n>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n>>> y = np.array([2, 6, 12, 20, 30, 42])\n>>> er = VotingRegressor([('lr', r1), ('rf', r2)])\n>>> print(er.fit(X, y).predict(X))\n[ 3.3 5.7 11.8 19.7 28. 40.3]" - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._weight_boosting", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import xlogy", - "from _base import BaseEnsemble", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import is_classifier", - "from base import is_regressor", - "from tree import DecisionTreeClassifier", - "from tree import DecisionTreeRegressor", - "from utils import check_array", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils.extmath import softmax", - "from utils.extmath import stable_cumsum", - "from metrics import accuracy_score", - "from metrics import r2_score", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import has_fit_parameter", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BaseWeightBoosting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, the sample weights are initialized to 1 / n_samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a boosted classifier/regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\nReturns\n-------\nself : object" - }, - { - "name": "_boost", - "decorators": [], - "parameters": [ - { - "name": "iboost", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the current boost iteration." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current sample weights." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current random number generator" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost.\n\nWarning: This method needs to be overridden by subclasses.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState\n The current random number generator\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nerror : float\n The classification error for the current boost.\n If None then boosting has terminated early." - }, - { - "name": "staged_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return staged scores for X, y.\n\nThis generator method yields the ensemble score after each iteration of\nboosting and therefore allows monitoring, such as to determine the\nscore on a test set after each boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n Labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nYields\n------\nz : float" - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances." - } - ], - "docstring": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "AdaBoostClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the boosted ensemble is built. Support for sample weighting is required, as well as proper ``classes_`` and ``n_classes_`` attributes. If ``None``, then the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier` initialized with `max_depth=1`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each classifier by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``." - }, - { - "name": "algorithm", - "type": "Literal['SAMME', 'SAMME.R']", - "hasDefault": true, - "default": "'SAMME", - "limitation": null, - "ignored": false, - "docstring": "If 'SAMME.R' then use the SAMME.R real boosting algorithm. ``base_estimator`` must support calculation of class probabilities. If 'SAMME' then use the SAMME discrete boosting algorithm. The SAMME.R algorithm typically converges faster than SAMME, achieving a lower test error with fewer boosting iterations." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given at each `base_estimator` at each boosting iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, the sample weights are initialized to ``1 / n_samples``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a boosted classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n ``1 / n_samples``.\n\nReturns\n-------\nself : object\n Fitted estimator." - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_boost", - "decorators": [], - "parameters": [ - { - "name": "iboost", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the current boost iteration." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current sample weights." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The RandomState instance used if the base estimator accepts a `random_state` attribute." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost.\n\nPerform a single boost according to the real multi-class SAMME.R\nalgorithm or to the discrete SAMME algorithm and return the updated\nsample weights.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState instance\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nestimator_error : float\n The classification error for the current boost.\n If None then boosting has terminated early." - }, - { - "name": "_boost_real", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost using the SAMME.R real algorithm." - }, - { - "name": "_boost_discrete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost using the SAMME discrete algorithm." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict classes for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted classes." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return staged predictions for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n------\ny : generator of ndarray of shape (n_samples,)\n The predicted classes." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\nscore : ndarray of shape of (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively." - }, - { - "name": "staged_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute decision function of ``X`` for each boosting iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each boosting iteration.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n------\nscore : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively." - }, - { - "name": "_compute_proba_from_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\n p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n = softmax((1 / K-1) * f(X))\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n 2009." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute." - }, - { - "name": "staged_predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nThis generator method yields the ensemble predicted class probabilities\nafter each iteration of boosting and therefore allows monitoring, such\nas to determine the predicted class probabilities on a test set after\neach boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n-------\np : generator of ndarray of shape (n_samples,)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe weighted mean predicted class log-probabilities of the classifiers\nin the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute." - } - ], - "docstring": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n Support for sample weighting is required, as well as proper\n ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n initialized with `max_depth=1`.\n\nn_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\nlearning_rate : float, default=1.\n Learning rate shrinks the contribution of each classifier by\n ``learning_rate``. There is a trade-off between ``learning_rate`` and\n ``n_estimators``.\n\nalgorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n ``base_estimator`` must support calculation of class probabilities.\n If 'SAMME' then use the SAMME discrete boosting algorithm.\n The SAMME.R algorithm typically converges faster than SAMME,\n achieving a lower test error with fewer boosting iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_classes_ : int\n The number of classes.\n\nestimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n Classification error for each estimator in the boosted\n ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nSee Also\n--------\nAdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n regressor on the original dataset and then fits additional copies of\n the regressor on the same dataset but where the weights of instances\n are adjusted according to the error of the current prediction.\n\nGradientBoostingClassifier : GB builds an additive model in a forward\n stage-wise fashion. Regression trees are fit on the negative gradient\n of the binomial or multinomial deviance loss function. Binary\n classification is a special case where only a single regression tree is\n induced.\n\nsklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n method used for classification.\n Creates a model that predicts the value of a target variable by\n learning simple decision rules inferred from the data features.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nAdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n>>> clf.score(X, y)\n0.983..." - }, - { - "name": "AdaBoostRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the boosted ensemble is built. If ``None``, then the base estimator is :class:`~sklearn.tree.DecisionTreeRegressor` initialized with `max_depth=3`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each regressor by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``." - }, - { - "name": "loss", - "type": "Literal['linear', 'square', 'exponential']", - "hasDefault": true, - "default": "'linear'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to use when updating the weights after each boosting iteration." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given at each `base_estimator` at each boosting iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. In addition, it controls the bootstrap of the weights used to train the `base_estimator` at each boosting iteration. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (real numbers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, the sample weights are initialized to 1 / n_samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a boosted regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (real numbers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\nReturns\n-------\nself : object" - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_boost", - "decorators": [], - "parameters": [ - { - "name": "iboost", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the current boost iteration." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current sample weights." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The RandomState instance used if the base estimator accepts a `random_state` attribute. Controls also the bootstrap of the weights used to train the weak learner. replacement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost for regression\n\nPerform a single boost according to the AdaBoost.R2 algorithm and\nreturn the updated sample weights.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n Controls also the bootstrap of the weights used to train the weak\n learner.\n replacement.\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nestimator_error : float\n The regression error for the current boost.\n If None then boosting has terminated early." - }, - { - "name": "_get_median_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression value for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted regression values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return staged predictions for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted regression values." - } - ], - "docstring": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n If ``None``, then the base estimator is\n :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n `max_depth=3`.\n\nn_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\nlearning_rate : float, default=1.\n Learning rate shrinks the contribution of each regressor by\n ``learning_rate``. There is a trade-off between ``learning_rate`` and\n ``n_estimators``.\n\nloss : {'linear', 'square', 'exponential'}, default='linear'\n The loss function to use when updating the weights after each\n boosting iteration.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n In addition, it controls the bootstrap of the weights used to train the\n `base_estimator` at each boosting iteration.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\nestimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n Regression error for each estimator in the boosted ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n... random_state=0, shuffle=False)\n>>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n>>> regr.fit(X, y)\nAdaBoostRegressor(n_estimators=100, random_state=0)\n>>> regr.predict([[0, 0, 0, 0]])\narray([4.7972...])\n>>> regr.score(X, y)\n0.9771...\n\nSee Also\n--------\nAdaBoostClassifier, GradientBoostingRegressor,\nsklearn.tree.DecisionTreeRegressor\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997." - } - ], - "functions": [ - { - "name": "_samme_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate algorithm 4, step 2, equation c) of Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009." - } - ] - }, - { - "name": "sklearn.ensemble", - "imports": [ - "import typing", - "from _base import BaseEnsemble", - "from _forest import RandomForestClassifier", - "from _forest import RandomForestRegressor", - "from _forest import RandomTreesEmbedding", - "from _forest import ExtraTreesClassifier", - "from _forest import ExtraTreesRegressor", - "from _bagging import BaggingClassifier", - "from _bagging import BaggingRegressor", - "from _iforest import IsolationForest", - "from _weight_boosting import AdaBoostClassifier", - "from _weight_boosting import AdaBoostRegressor", - "from _gb import GradientBoostingClassifier", - "from _gb import GradientBoostingRegressor", - "from _voting import VotingClassifier", - "from _voting import VotingRegressor", - "from _stacking import StackingClassifier", - "from _stacking import StackingRegressor", - "from _hist_gradient_boosting.gradient_boosting import HistGradientBoostingRegressor", - "from _hist_gradient_boosting.gradient_boosting import HistGradientBoostingClassifier" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble.tests.test_bagging", - "imports": [ - "from itertools import product", - "import numpy as np", - "import joblib", - "import pytest", - "from sklearn.base import BaseEstimator", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.ensemble import BaggingClassifier", - "from sklearn.ensemble import BaggingRegressor", - "from sklearn.linear_model import Perceptron", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.random_projection import SparseRandomProjection", - "from sklearn.pipeline import make_pipeline", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_hastie_10_2", - "from sklearn.utils import check_random_state", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.preprocessing import scale", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix" - ], - "classes": [ - { - "name": "DummySizeEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyZeroEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bootstrap_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bootstrap_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_with_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_sample_weight_unsupported_but_passed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_with_oob_score_fails", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_removed_on_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimators_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimators_samples_deterministic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_samples_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_oob_score_label_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "replace", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_regressor_with_missing_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_classifier_with_missing_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_small_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_get_estimators_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_base", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.datasets import load_iris", - "from sklearn.ensemble import BaggingClassifier", - "from sklearn.ensemble._base import _set_random_states", - "from sklearn.linear_model import Perceptron", - "from collections import OrderedDict", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis", - "from sklearn.pipeline import Pipeline", - "from sklearn.feature_selection import SelectFromModel" - ], - "classes": [], - "functions": [ - { - "name": "test_base", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_zero_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_not_int_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_random_states", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_common", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.base import clone", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import is_classifier", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_diabetes", - "from sklearn.impute import SimpleImputer", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.pipeline import make_pipeline", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.ensemble import StackingRegressor", - "from sklearn.ensemble import VotingClassifier", - "from sklearn.ensemble import VotingRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_ensemble_heterogeneous_estimators_behavior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ensemble_heterogeneous_estimators_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ensemble_heterogeneous_estimators_name_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ensemble_heterogeneous_estimators_all_dropped", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_heterogeneous_ensemble_support_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_forest", - "imports": [ - "import pickle", - "import math", - "from collections import defaultdict", - "import itertools", - "from itertools import combinations", - "from itertools import product", - "from typing import Dict", - "from typing import Any", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.special import comb", - "import pytest", - "import joblib", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import skip_if_no_parallel", - "from sklearn.utils.fixes import parse_version", - "from sklearn.exceptions import NotFittedError", - "from sklearn import datasets", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.datasets import make_classification", - "from sklearn.ensemble import ExtraTreesClassifier", - "from sklearn.ensemble import ExtraTreesRegressor", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import RandomTreesEmbedding", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.svm import LinearSVC", - "from sklearn.utils.validation import check_random_state", - "from sklearn.tree._classes import SPARSE_SPLITTERS" - ], - "classes": [ - { - "name": "MyBackend", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "start_call", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "check_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check classification on a toy dataset." - }, - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_iris_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regression_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressor_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances_asymptotic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unfitted_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_regressors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_oob_score_raise_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_raise_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check parallel computations in classification" - }, - { - "name": "test_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classes_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_trees_dense_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_trees_dense_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_hasher", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_hasher_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_samples_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_fraction_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_fraction_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_memory_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_memory_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_balanced_and_bootstrap_multi_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_balanced_and_bootstrap_multi_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_oob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_oob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_convert", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_decrease", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_backend_respected", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_forest_feature_importances_sum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_forest_degenerate_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_samples_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_forest_y_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_little_tree_with_small_max_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_gradient_boosting", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.special import expit", - "import pytest", - "from sklearn import datasets", - "from sklearn.base import clone", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble._gradient_boosting import predict_stages", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.preprocessing import scale", - "from sklearn.svm import LinearSVC", - "from sklearn.metrics import mean_squared_error", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils import check_random_state", - "from sklearn.utils import tosequence", - "from sklearn.utils._mocking import NoSampleWeightWrapper", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.exceptions import DataConversionWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.pipeline import make_pipeline", - "from sklearn.linear_model import LinearRegression", - "from sklearn.svm import NuSVR", - "import cPickle as pickle", - "import pickle", - "from io import StringIO", - "import sys", - "from sklearn.tree._tree import TREE_LEAF" - ], - "classes": [], - "functions": [ - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbdt_parameter_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbdt_loss_alpha_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_type_loss_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_synthetic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_synthetic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability_log", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_class_with_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_inputs_predict_stages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_feature_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_importance_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that Gini importance is calculated correctly.\n\nThis test follows the example from [1]_ (pg. 373).\n\n.. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements\n of statistical learning. New York: Springer series in statistics." - }, - { - "name": "test_max_feature_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_functions_defensive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_serialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_degenerate_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_symbol_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_float_class_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shape_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mem_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_improvement", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_improvement_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_multilcass_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_more_verbose_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_zero_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_oob_switch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_oob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_fortran", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "early_stopping_monitor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns True on the 10th iteration. " - }, - { - "name": "test_monitor_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complete_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complete_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_estimator_reg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_estimator_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_decrease", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_wo_nestimators_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability_exponential", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_uniform_weights_toy_edge_case_reg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_uniform_weights_toy_edge_case_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_validation_fraction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_stratified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_with_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_with_init_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_init_wrong_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_n_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbr_degenerate_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbr_deprecated_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_attr_error_raised_if_not_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_criterion_mae_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_gradient_boosting_loss_functions", - "imports": [ - "from itertools import product", - "import numpy as np", - "from numpy.testing import assert_allclose", - "import pytest", - "from pytest import approx", - "from sklearn.utils import check_random_state", - "from sklearn.ensemble._gb_losses import RegressionLossFunction", - "from sklearn.ensemble._gb_losses import LeastSquaresError", - "from sklearn.ensemble._gb_losses import LeastAbsoluteError", - "from sklearn.ensemble._gb_losses import HuberLossFunction", - "from sklearn.ensemble._gb_losses import QuantileLossFunction", - "from sklearn.ensemble._gb_losses import BinomialDeviance", - "from sklearn.ensemble._gb_losses import MultinomialDeviance", - "from sklearn.ensemble._gb_losses import ExponentialLoss", - "from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS" - ], - "classes": [], - "functions": [ - { - "name": "test_binomial_deviance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_smoke", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_init_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_loss_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_deviance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_deviance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mdl_computation_weighted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mdl_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_raw_predictions_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_raw_predictions_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lad_equals_quantile_50", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_iforest", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.ensemble import IsolationForest", - "from sklearn.ensemble._iforest import _average_path_length", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.utils import check_random_state", - "from sklearn.metrics import roc_auc_score", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from unittest.mock import Mock", - "from unittest.mock import patch" - ], - "classes": [], - "functions": [ - { - "name": "test_iforest", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check Isolation Forest for various parameter settings." - }, - { - "name": "test_iforest_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check IForest for various parameter settings on sparse input." - }, - { - "name": "test_iforest_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that it gives proper exception on deficient input." - }, - { - "name": "test_recalculate_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check max_depth recalculation when max_samples is reset to n_samples" - }, - { - "name": "test_max_samples_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_parallel_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check parallel regression." - }, - { - "name": "test_iforest_performance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test Isolation Forest performs well" - }, - { - "name": "test_iforest_works", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_samples_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_subsampled_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_average_path_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test iterative addition of iTrees to an iForest " - }, - { - "name": "test_iforest_chunks_works1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_chunks_works2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_with_uniform_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether iforest predicts inliers when using uniform data" - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_stacking", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sparse", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import RegressorMixin", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_classification", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.svm import SVC", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.preprocessing import scale", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.ensemble import StackingRegressor", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import KFold", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [ - { - "name": "NoWeightRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoWeightClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_stacking_classifier_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_drop_column_binary_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_drop_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_drop_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_sparse_passthrough", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_sparse_passthrough", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_drop_binary_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_randomness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_stratify_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_with_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_sample_weight_fit_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_cv_influence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_without_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_voting", - "imports": [ - "import warnings", - "import pytest", - "import re", - "import numpy as np", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.naive_bayes import GaussianNB", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import VotingClassifier", - "from sklearn.ensemble import VotingRegressor", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.model_selection import GridSearchCV", - "from sklearn import datasets", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.svm import SVC", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.dummy import DummyRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_voting_classifier_estimator_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predictproba_hardvoting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_notfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_majority_label_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check classification by majority label on dataset iris." - }, - { - "name": "test_tie_situation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check voting classifier selects smaller class label in tie situation." - }, - { - "name": "test_weights_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check classification by average probabilities on dataset iris." - }, - { - "name": "test_weights_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check weighted average regression prediction on diabetes dataset." - }, - { - "name": "test_predict_on_toy_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Manually check predicted class labels for toy dataset." - }, - { - "name": "test_predict_proba_on_toy_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate predicted probabilities on toy dataset." - }, - { - "name": "test_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if error is raised for multilabel classification." - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check GridSearch support." - }, - { - "name": "test_parallel_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check parallel backend of VotingClassifier on toy dataset." - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests sample_weight parameter of VotingClassifier" - }, - { - "name": "test_sample_weight_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that VotingClassifier passes sample_weight as kwargs" - }, - { - "name": "test_voting_classifier_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_estimator_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimator_weights_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check transform method of VotingClassifier on toy dataset." - }, - { - "name": "test_none_estimator_with_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_voting_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_weight_boosting", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.base import BaseEstimator", - "from sklearn.base import clone", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.linear_model import LinearRegression", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.ensemble import AdaBoostClassifier", - "from sklearn.ensemble import AdaBoostRegressor", - "from sklearn.ensemble._weight_boosting import _samme_proba", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.utils import shuffle", - "from sklearn.utils._mocking import NoSampleWeightWrapper", - "from sklearn import datasets", - "import pickle", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_samme_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass_adaboost_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_adaboost_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "AdaBoostRegressor should work without sample_weights in the base estimator\nThe random weighted sampling is done internally in the _boost method in\nAdaBoostRegressor." - }, - { - "name": "test_multidimensional_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the AdaBoost estimators can work with n-dimensional\ndata matrix" - }, - { - "name": "test_adaboostclassifier_without_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaboostregressor_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaboost_consistent_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaboost_negative_weight_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.binning", - "imports": [ - "import numpy as np", - "from utils import check_random_state", - "from utils import check_array", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from _binning import _map_to_bins", - "from common import X_DTYPE", - "from common import X_BINNED_DTYPE", - "from common import ALMOST_INF", - "from common import X_BITSET_INNER_DTYPE", - "from _bitset import set_bitset_memoryview" - ], - "classes": [ - { - "name": "_BinMapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_bins", - "type": "int", - "hasDefault": true, - "default": "256", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of bins to use (including the bin for missing values). Should be in [3, 256]. Non-missing values are binned on ``max_bins = n_bins - 1`` bins. The last bin is always reserved for missing values. If for a given feature the number of unique values is less than ``max_bins``, then those unique values will be used to compute the bin thresholds, instead of the quantiles. For categorical features indicated by ``is_categorical``, the docstring for ``is_categorical`` details on this procedure." - }, - { - "name": "subsample", - "type": "Optional[int]", - "hasDefault": true, - "default": "2e5", - "limitation": null, - "ignored": false, - "docstring": "If ``n_samples > subsample``, then ``sub_samples`` samples will be randomly chosen to compute the quantiles. If ``None``, the whole data is used." - }, - { - "name": "is_categorical", - "type": "NDArray[bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates categorical features. By default, all features are considered continuous." - }, - { - "name": "known_categories", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For each categorical feature, the array indicates the set of unique categorical values. These should be the possible values over all the data, not just the training data. For continuous features, the corresponding entry should be None." - }, - { - "name": "random_state: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the random sub-sampling. Pass an int for reproducible output across multiple function calls. See :term: `Glossary `." - }, - { - "name": "RandomState instance or None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the random sub-sampling. Pass an int for reproducible output across multiple function calls. See :term: `Glossary `." - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the random sub-sampling. Pass an int for reproducible output across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to bin." - }, - { - "name": "y: None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit data X by computing the binning thresholds.\n\nThe last bin is reserved for missing values, whether missing values\nare present in the data or not.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to bin.\ny: None\n Ignored.\n\nReturns\n-------\nself : object" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to bin." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bin data X.\n\nMissing values will be mapped to the last bin.\n\nFor categorical features, the mapping will be incorrect for unknown\ncategories. Since the BinMapper is given known_categories of the\nentire training data (i.e. before the call to train_test_split() in\ncase of early-stopping), this never happens.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to bin.\n\nReturns\n-------\nX_binned : array-like of shape (n_samples, n_features)\n The binned data (fortran-aligned)." - }, - { - "name": "make_known_categories_bitsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create bitsets of known categories.\n\nReturns\n-------\n- known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n- f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array." - } - ], - "docstring": "Transformer that maps a dataset into integer-valued bins.\n\nFor continuous features, the bins are created in a feature-wise fashion,\nusing quantiles so that each bins contains approximately the same number\nof samples. For large datasets, quantiles are computed on a subset of the\ndata to speed-up the binning, but the quantiles should remain stable.\n\nFor categorical features, the raw categorical values are expected to be\nin [0, 254] (this is not validated here though) and each category\ncorresponds to a bin. All categorical values must be known at\ninitialization: transform() doesn't know how to bin unknown categorical\nvalues. Note that transform() is only used on non-training data in the\ncase of early stopping.\n\nFeatures with a small number of values may be binned into less than\n``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved\nfor missing values.\n\nParameters\n----------\nn_bins : int, default=256\n The maximum number of bins to use (including the bin for missing\n values). Should be in [3, 256]. Non-missing values are binned on\n ``max_bins = n_bins - 1`` bins. The last bin is always reserved for\n missing values. If for a given feature the number of unique values is\n less than ``max_bins``, then those unique values will be used to\n compute the bin thresholds, instead of the quantiles. For categorical\n features indicated by ``is_categorical``, the docstring for\n ``is_categorical`` details on this procedure.\nsubsample : int or None, default=2e5\n If ``n_samples > subsample``, then ``sub_samples`` samples will be\n randomly chosen to compute the quantiles. If ``None``, the whole data\n is used.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features. By default, all features are\n considered continuous.\nknown_categories : list of {ndarray, None} of shape (n_features,), default=none\n For each categorical feature, the array indicates the set of unique\n categorical values. These should be the possible values over all the\n data, not just the training data. For continuous features, the\n corresponding entry should be None.\nrandom_state: int, RandomState instance or None, default=None\n Pseudo-random number generator to control the random sub-sampling.\n Pass an int for reproducible output across multiple\n function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nbin_thresholds_ : list of ndarray\n For each feature, each array indicates how to map a feature into a\n binned feature. The semantic and size depends on the nature of the\n feature:\n - for real-valued features, the array corresponds to the real-valued\n bin thresholds (the upper bound of each bin). There are ``max_bins\n - 1`` thresholds, where ``max_bins = n_bins - 1`` is the number of\n bins used for non-missing values.\n - for categorical features, the array is a map from a binned category\n value to the raw category value. The size of the array is equal to\n ``min(max_bins, category_cardinality)`` where we ignore missing\n values in the cardinality.\nn_bins_non_missing_ : ndarray, dtype=np.uint32\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this is\n equal to ``n_bins - 1``.\nis_categorical_ : ndarray of shape (n_features,), dtype=np.uint8\n Indicator for categorical features.\nmissing_values_bin_idx_ : np.uint8\n The index of the bin where missing values are mapped. This is a\n constant across all features. This corresponds to the last bin, and\n it is always equal to ``n_bins - 1``. Note that if ``n_bins_missing_``\n is less than ``n_bins - 1`` for a given feature, then there are\n empty (and unused) bins." - } - ], - "functions": [ - { - "name": "_find_binning_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract quantiles from a continuous feature.\n\nMissing values are ignored for finding the thresholds.\n\nParameters\n----------\ncol_data : array-like, shape (n_samples,)\n The continuous feature to bin.\nmax_bins: int\n The maximum number of bins to use for non-missing values. If for a\n given feature the number of unique values is less than ``max_bins``,\n then those unique values will be used to compute the bin thresholds,\n instead of the quantiles\n\nReturn\n------\nbinning_thresholds : ndarray of shape(min(max_bins, n_unique_values) - 1,)\n The increasing numeric values that can be used to separate the bins.\n A given value x will be mapped into bin value i iff\n bining_thresholds[i - 1] < x <= binning_thresholds[i]" - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", - "imports": [ - "from abc import ABC", - "from abc import abstractmethod", - "from functools import partial", - "import numpy as np", - "from timeit import default_timer as time", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import ClassifierMixin", - "from base import is_classifier", - "from utils import check_random_state", - "from utils import check_array", - "from utils import resample", - "from utils.validation import check_is_fitted", - "from utils.validation import check_consistent_length", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from metrics import check_scoring", - "from model_selection import train_test_split", - "from preprocessing import LabelEncoder", - "from _gradient_boosting import _update_raw_predictions", - "from common import Y_DTYPE", - "from common import X_DTYPE", - "from common import X_BINNED_DTYPE", - "from binning import _BinMapper", - "from grower import TreeGrower", - "from loss import _LOSSES", - "from loss import BaseLoss" - ], - "classes": [ - { - "name": "BaseHistGradientBoosting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate parameters passed to __init__.\n\nThe parameters that are directly passed to the grower are checked in\nTreeGrower." - }, - { - "name": "_check_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check and validate categorical features in X\n\nReturn\n------\nis_categorical : ndarray of shape (n_features,) or None, dtype=bool\n Indicates whether a feature is categorical. If no feature is\n categorical, this is None.\nknown_categories : list of size n_features or None\n The list contains, for each feature:\n - an array of shape (n_categories,) with the unique cat values\n - None if the feature is not categorical\n None if no feature is categorical." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Weights of training data.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_clear_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Clear the state of the gradient boosting model." - }, - { - "name": "_get_small_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the indices of the subsample set and return this set.\n\nFor efficiency, we need to subsample the training set to compute scores\nwith scorers." - }, - { - "name": "_check_early_stopping_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if fitting should be early-stopped based on scorer.\n\nScores are computed on validation data or on training data." - }, - { - "name": "_check_early_stopping_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if fitting should be early-stopped based on loss.\n\nScores are computed on validation data or on training data." - }, - { - "name": "_should_stop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True (do early stopping) if the last n scores aren't better\nthan the (n-1)th-to-last score, up to some tolerance." - }, - { - "name": "_bin_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bin data X.\n\nIf is_training_data, then fit the _bin_mapper attribute.\nElse, the binned data is converted to a C-contiguous array." - }, - { - "name": "_print_iteration_stats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print info about the current fitting iteration." - }, - { - "name": "_raw_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the sum of the leaves values over all predictors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nraw_predictions : array, shape (n_trees_per_iteration, n_samples)\n The raw predicted values." - }, - { - "name": "_predict_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Add the predictions of the predictors to raw_predictions." - }, - { - "name": "_staged_raw_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\nraw_predictions : generator of ndarray of shape (n_trees_per_iteration, n_samples)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray, shape (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_encode_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_iter_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for histogram-based gradient boosting estimators." - }, - { - "name": "HistGradientBoostingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['least_squares', 'least_absolute_deviation', 'poisson']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function to use in the boosting process. Note that the \"least squares\" and \"poisson\" losses actually implement \"half least squares loss\" and \"half poisson deviance\" to simplify the computation of the gradient. Furthermore, \"poisson\" loss internally uses a log-link and requires ``y >= 0`` .. versionchanged:: 0.23 Added option 'poisson'." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The learning rate, also known as *shrinkage*. This is used as a multiplicative factor for the leaves values. Use ``1`` for no shrinkage." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations of the boosting process, i.e. the maximum number of trees." - }, - { - "name": "max_leaf_nodes", - "type": "Optional[int]", - "hasDefault": true, - "default": "31", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of leaves for each tree. Must be strictly greater than 1. If None, there is no maximum limit." - }, - { - "name": "max_depth", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. The depth of a tree is the number of edges to go from the root to the deepest leaf. Depth isn't constrained by default." - }, - { - "name": "min_samples_leaf", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples per leaf. For small datasets with less than a few hundred samples, it is recommended to lower this value since only very shallow trees would be built." - }, - { - "name": "l2_regularization", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The L2 regularization parameter. Use ``0`` for no regularization (default)." - }, - { - "name": "max_bins", - "type": "int", - "hasDefault": true, - "default": "255", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of bins to use for non-missing values. Before training, each feature of the input array `X` is binned into integer-valued bins, which allows for a much faster training stage. Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255." - }, - { - "name": "monotonic_cst", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the monotonic constraint to enforce on each feature. -1, 1 and 0 respectively correspond to a negative constraint, positive constraint and no constraint. Read more in the :ref:`User Guide `. .. versionadded:: 0.23" - }, - { - "name": "categorical_features", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the categorical features. - None : no feature will be considered categorical. - boolean array-like : boolean mask indicating categorical features. - integer array-like : integer indices indicating categorical features. For each categorical feature, there must be at most `max_bins` unique categories, and each categorical value must be in [0, max_bins -1]. Read more in the :ref:`User Guide `. .. versionadded:: 0.24" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the estimator should be re-trained on the same data only. See :term:`the Glossary `." - }, - { - "name": "early_stopping", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', early stopping is enabled if the sample size is larger than 10000. If True, early stopping is enabled, otherwise early stopping is disabled. .. versionadded:: 0.23" - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "'loss'", - "limitation": null, - "ignored": false, - "docstring": "Scoring parameter to use for early stopping. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If ``scoring='loss'``, early stopping is checked w.r.t the loss value. Only used if early stopping is performed." - }, - { - "name": "validation_fraction", - "type": "Optional[Union[float, int]]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. Only used if early stopping is performed." - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Used to determine when to \"early stop\". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some tolerance. Only used if early stopping is performed." - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "The absolute tolerance to use when comparing scores during early stopping. The higher the tolerance, the more likely we are to early stop: higher tolerance means that it will be harder for subsequent iterations to be considered an improvement upon the reference score." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level. If not zero, print some information about the fitting process." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the subsampling in the binning process, and the train/validation data split if early stopping is enabled. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict values for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The predicted values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for each iteration\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted values of the input samples, for each iteration." - }, - { - "name": "_encode_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'least_squares', 'least_absolute_deviation', 'poisson'}, default='least_squares'\n The loss function to use in the boosting process. Note that the\n \"least squares\" and \"poisson\" losses actually implement\n \"half least squares loss\" and \"half poisson deviance\" to simplify the\n computation of the gradient. Furthermore, \"poisson\" loss internally\n uses a log-link and requires ``y >= 0``\n\n .. versionchanged:: 0.23\n Added option 'poisson'.\n\nlearning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\nmax_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees.\nmax_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\nl2_regularization : float, default=0\n The L2 regularization parameter. Use ``0`` for no regularization\n (default).\nmax_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\nmonotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\ncategorical_features : array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\nearly_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used. If\n ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\ntol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores during early\n stopping. The higher the tolerance, the more likely we are to early\n stop: higher tolerance means that it will be harder for subsequent\n iterations to be considered an improvement upon the reference score.\nverbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ndo_early_stopping_ : bool\n Indicates whether early stopping is used during training.\nn_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n The number of tree that are built at each iteration. For regressors,\n this is always 1.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\nExamples\n--------\n>>> # To use this experimental feature, we need to explicitly ask for it:\n>>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n>>> from sklearn.ensemble import HistGradientBoostingRegressor\n>>> from sklearn.datasets import load_diabetes\n>>> X, y = load_diabetes(return_X_y=True)\n>>> est = HistGradientBoostingRegressor().fit(X, y)\n>>> est.score(X, y)\n0.92..." - }, - { - "name": "HistGradientBoostingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['auto', 'binary_crossentropy', 'categorical_crossentropy']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function to use in the boosting process. 'binary_crossentropy' (also known as logistic loss) is used for binary classification and generalizes to 'categorical_crossentropy' for multiclass classification. 'auto' will automatically choose either loss depending on the nature of the problem." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The learning rate, also known as *shrinkage*. This is used as a multiplicative factor for the leaves values. Use ``1`` for no shrinkage." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations of the boosting process, i.e. the maximum number of trees for binary classification. For multiclass classification, `n_classes` trees per iteration are built." - }, - { - "name": "max_leaf_nodes", - "type": "Optional[int]", - "hasDefault": true, - "default": "31", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of leaves for each tree. Must be strictly greater than 1. If None, there is no maximum limit." - }, - { - "name": "max_depth", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. The depth of a tree is the number of edges to go from the root to the deepest leaf. Depth isn't constrained by default." - }, - { - "name": "min_samples_leaf", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples per leaf. For small datasets with less than a few hundred samples, it is recommended to lower this value since only very shallow trees would be built." - }, - { - "name": "l2_regularization", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The L2 regularization parameter. Use 0 for no regularization." - }, - { - "name": "max_bins", - "type": "int", - "hasDefault": true, - "default": "255", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of bins to use for non-missing values. Before training, each feature of the input array `X` is binned into integer-valued bins, which allows for a much faster training stage. Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255." - }, - { - "name": "monotonic_cst", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the monotonic constraint to enforce on each feature. -1, 1 and 0 respectively correspond to a negative constraint, positive constraint and no constraint. Read more in the :ref:`User Guide `. .. versionadded:: 0.23" - }, - { - "name": "categorical_features", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the categorical features. - None : no feature will be considered categorical. - boolean array-like : boolean mask indicating categorical features. - integer array-like : integer indices indicating categorical features. For each categorical feature, there must be at most `max_bins` unique categories, and each categorical value must be in [0, max_bins -1]. Read more in the :ref:`User Guide `. .. versionadded:: 0.24" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the estimator should be re-trained on the same data only. See :term:`the Glossary `." - }, - { - "name": "early_stopping", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', early stopping is enabled if the sample size is larger than 10000. If True, early stopping is enabled, otherwise early stopping is disabled. .. versionadded:: 0.23" - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "'loss'", - "limitation": null, - "ignored": false, - "docstring": "Scoring parameter to use for early stopping. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If ``scoring='loss'``, early stopping is checked w.r.t the loss value. Only used if early stopping is performed." - }, - { - "name": "validation_fraction", - "type": "Optional[Union[float, int]]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. Only used if early stopping is performed." - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Used to determine when to \"early stop\". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some tolerance. Only used if early stopping is performed." - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "The absolute tolerance to use when comparing scores. The higher the tolerance, the more likely we are to early stop: higher tolerance means that it will be harder for subsequent iterations to be considered an improvement upon the reference score." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level. If not zero, print some information about the fitting process." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the subsampling in the binning process, and the train/validation data split if early stopping is enabled. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict classes for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The predicted classes." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict classes at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted classes of the input samples, for each iteration." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\np : ndarray, shape (n_samples, n_classes)\n The class probabilities of the input samples." - }, - { - "name": "staged_predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted class probabilities of the input samples,\n for each iteration." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ndecision : ndarray, shape (n_samples,) or (n_samples, n_trees_per_iteration)\n The raw predicted values (i.e. the sum of the trees leaves) for\n each sample. n_trees_per_iteration is equal to the number of\n classes in multiclass classification." - }, - { - "name": "staged_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ndecision : generator of ndarray of shape (n_samples,) or (n_samples, n_trees_per_iteration)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "_encode_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'auto', 'binary_crossentropy', 'categorical_crossentropy'}, default='auto'\n The loss function to use in the boosting process. 'binary_crossentropy'\n (also known as logistic loss) is used for binary classification and\n generalizes to 'categorical_crossentropy' for multiclass\n classification. 'auto' will automatically choose either loss depending\n on the nature of the problem.\nlearning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\nmax_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees for binary classification. For multiclass\n classification, `n_classes` trees per iteration are built.\nmax_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\nl2_regularization : float, default=0\n The L2 regularization parameter. Use 0 for no regularization.\nmax_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\nmonotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\ncategorical_features : array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\nearly_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer\n is used. If ``scoring='loss'``, early stopping is checked\n w.r.t the loss value. Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\ntol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores. The higher the\n tolerance, the more likely we are to early stop: higher tolerance\n means that it will be harder for subsequent iterations to be\n considered an improvement upon the reference score.\nverbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : array, shape = (n_classes,)\n Class labels.\ndo_early_stopping_ : bool\n Indicates whether early stopping is used during training.\nn_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n The number of tree that are built at each iteration. This is equal to 1\n for binary classification, and to ``n_classes`` for multiclass\n classification.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\nExamples\n--------\n>>> # To use this experimental feature, we need to explicitly ask for it:\n>>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n>>> from sklearn.ensemble import HistGradientBoostingClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = HistGradientBoostingClassifier().fit(X, y)\n>>> clf.score(X, y)\n1.0" - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.grower", - "imports": [ - "from heapq import heappush", - "from heapq import heappop", - "import numpy as np", - "from timeit import default_timer as time", - "import numbers", - "from splitting import Splitter", - "from histogram import HistogramBuilder", - "from predictor import TreePredictor", - "from utils import sum_parallel", - "from common import PREDICTOR_RECORD_DTYPE", - "from common import X_BITSET_INNER_DTYPE", - "from common import Y_DTYPE", - "from common import MonotonicConstraint", - "from _bitset import set_raw_bitset_from_binned_bitset" - ], - "classes": [ - { - "name": "TreeNode", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "depth", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The depth of the node, i.e. its distance from the root." - }, - { - "name": "sample_indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the samples at the node." - }, - { - "name": "sum_gradients", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sum of the gradients of the samples at the node." - }, - { - "name": "sum_hessians", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sum of the hessians of the samples at the node." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_children_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set children values bounds to respect monotonic constraints." - }, - { - "name": "__lt__", - "decorators": [], - "parameters": [ - { - "name": "other_node", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The node to compare with." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Comparison for priority queue.\n\nNodes with high gain are higher priority than nodes with low gain.\n\nheapq.heappush only need the '<' operator.\nheapq.heappop take the smallest item first (smaller is higher\npriority).\n\nParameters\n----------\nother_node : TreeNode\n The node to compare with." - } - ], - "docstring": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).\n\nParameters\n----------\ndepth : int\n The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\nsum_gradients : float\n The sum of the gradients of the samples at the node.\nsum_hessians : float\n The sum of the hessians of the samples at the node.\n\nAttributes\n----------\ndepth : int\n The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\nsum_gradients : float\n The sum of the gradients of the samples at the node.\nsum_hessians : float\n The sum of the hessians of the samples at the node.\nsplit_info : SplitInfo or None\n The result of the split evaluation.\nleft_child : TreeNode or None\n The left child of the node. None for leaves.\nright_child : TreeNode or None\n The right child of the node. None for leaves.\nvalue : float or None\n The value of the leaf, as computed in finalize_leaf(). None for\n non-leaf nodes.\npartition_start : int\n start position of the node's sample_indices in splitter.partition.\npartition_stop : int\n stop position of the node's sample_indices in splitter.partition." - }, - { - "name": "TreeGrower", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "X_binned", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The binned input samples. Must be Fortran-aligned." - }, - { - "name": "gradients", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The gradients of each training sample. Those are the gradients of the loss w.r.t the predictions, evaluated at iteration ``i - 1``." - }, - { - "name": "hessians", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The hessians of each training sample. Those are the hessians of the loss w.r.t the predictions, evaluated at iteration ``i - 1``." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of leaves for each tree. If None, there is no maximum limit." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. The depth of a tree is the number of edges to go from the root to the deepest leaf. Depth isn't constrained by default." - }, - { - "name": "min_samples_leaf", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples per leaf." - }, - { - "name": "min_gain_to_split", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum gain needed to split a node. Splits with lower gain will be ignored." - }, - { - "name": "n_bins", - "type": "int", - "hasDefault": true, - "default": "256", - "limitation": null, - "ignored": false, - "docstring": "The total number of bins, including the bin for missing values. Used to define the shape of the histograms." - }, - { - "name": "n_bins_non_missing", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For each feature, gives the number of bins actually used for non-missing values. For features with a lot of unique values, this is equal to ``n_bins - 1``. If it's an int, all features are considered to have the same number of bins. If None, all features are considered to have ``n_bins - 1`` bins." - }, - { - "name": "has_missing_values", - "type": "Union[NDArray, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether each feature contains missing values (in the training data). If it's a bool, the same value is used for all features." - }, - { - "name": "is_categorical", - "type": "NDArray[bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates categorical features." - }, - { - "name": "monotonic_cst", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the monotonic constraint to enforce on each feature. -1, 1 and 0 respectively correspond to a positive constraint, negative constraint and no constraint. Read more in the :ref:`User Guide `." - }, - { - "name": "l2_regularization", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The L2 regularization parameter." - }, - { - "name": "min_hessian_to_split", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The minimum sum of hessians needed in each node. Splits that result in at least one child having a sum of hessians less than ``min_hessian_to_split`` are discarded." - }, - { - "name": "shrinkage", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The shrinkage parameter to apply to the leaves values, also known as learning rate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate parameters passed to __init__.\n\nAlso validate parameters passed to splitter." - }, - { - "name": "grow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Grow the tree, from root to leaves." - }, - { - "name": "_apply_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Multiply leaves values by shrinkage parameter.\n\nThis must be done at the very end of the growing process. If this were\ndone during the growing process e.g. in finalize_leaf(), then a leaf\nwould be shrunk but its sibling would potentially not be (if it's a\nnon-leaf), which would lead to a wrong computation of the 'middle'\nvalue needed to enforce the monotonic constraints." - }, - { - "name": "_intilialize_root", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize root node and finalize it if needed." - }, - { - "name": "_compute_best_split_and_push", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the best possible split (SplitInfo) of a given node.\n\nAlso push it in the heap of splittable nodes if gain isn't zero.\nThe gain of a node is 0 if either all the leaves are pure\n(best gain = 0), or if no split would satisfy the constraints,\n(min_hessians_to_split, min_gain_to_split, min_samples_leaf)" - }, - { - "name": "split_next", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split the node with highest potential gain.\n\nReturns\n-------\nleft : TreeNode\n The resulting left child.\nright : TreeNode\n The resulting right child." - }, - { - "name": "_finalize_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make node a leaf of the tree being grown." - }, - { - "name": "_finalize_splittable_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform all splittable nodes into leaves.\n\nUsed when some constraint is met e.g. maximum number of leaves or\nmaximum depth." - }, - { - "name": "make_predictor", - "decorators": [], - "parameters": [ - { - "name": "binning_thresholds", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Corresponds to the bin_thresholds_ attribute of the BinMapper. For each feature, this stores: - the bin frontiers for continuous features - the unique raw category values for categorical features" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Make a TreePredictor object out of the current tree.\n\nParameters\n----------\nbinning_thresholds : array-like of floats\n Corresponds to the bin_thresholds_ attribute of the BinMapper.\n For each feature, this stores:\n\n - the bin frontiers for continuous features\n - the unique raw category values for categorical features\n\nReturns\n-------\nA TreePredictor object." - } - ], - "docstring": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.\n\nParameters\n----------\nX_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n The binned input samples. Must be Fortran-aligned.\ngradients : ndarray of shape (n_samples,)\n The gradients of each training sample. Those are the gradients of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nhessians : ndarray of shape (n_samples,)\n The hessians of each training sample. Those are the hessians of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nmax_leaf_nodes : int, default=None\n The maximum number of leaves for each tree. If None, there is no\n maximum limit.\nmax_depth : int, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf.\nmin_gain_to_split : float, default=0.\n The minimum gain needed to split a node. Splits with lower gain will\n be ignored.\nn_bins : int, default=256\n The total number of bins, including the bin for missing values. Used\n to define the shape of the histograms.\nn_bins_non_missing : ndarray, dtype=np.uint32, default=None\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this\n is equal to ``n_bins - 1``. If it's an int, all features are\n considered to have the same number of bins. If None, all features\n are considered to have ``n_bins - 1`` bins.\nhas_missing_values : bool or ndarray, dtype=bool, default=False\n Whether each feature contains missing values (in the training data).\n If it's a bool, the same value is used for all features.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features.\nmonotonic_cst : array-like of shape (n_features,), dtype=int, default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a positive constraint, negative\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\nl2_regularization : float, default=0.\n The L2 regularization parameter.\nmin_hessian_to_split : float, default=1e-3\n The minimum sum of hessians needed in each node. Splits that result in\n at least one child having a sum of hessians less than\n ``min_hessian_to_split`` are discarded.\nshrinkage : float, default=1.\n The shrinkage parameter to apply to the leaves values, also known as\n learning rate." - } - ], - "functions": [ - { - "name": "_fill_predictor_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper used in make_predictor to set the TreePredictor fields." - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.loss", - "imports": [ - "from abc import ABC", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import expit", - "from scipy.special import logsumexp", - "from scipy.special import xlogy", - "from common import Y_DTYPE", - "from common import G_H_DTYPE", - "from _loss import _update_gradients_least_squares", - "from _loss import _update_gradients_hessians_least_squares", - "from _loss import _update_gradients_least_absolute_deviation", - "from _loss import _update_gradients_hessians_least_absolute_deviation", - "from _loss import _update_gradients_hessians_binary_crossentropy", - "from _loss import _update_gradients_hessians_categorical_crossentropy", - "from _loss import _update_gradients_hessians_poisson", - "from utils.stats import _weighted_percentile" - ], - "classes": [ - { - "name": "BaseLoss", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the weighted average loss" - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return loss value for each input" - }, - { - "name": "init_gradients_and_hessians", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples passed to `fit()`." - }, - { - "name": "prediction_dim", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimension of a raw prediction, i.e. the number of trees built at each iteration. Equals 1 for regression and binary classification, or K where K is the number of classes for multiclass classification." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return initial gradients and hessians.\n\nUnless hessians are constant, arrays are initialized with undefined\nvalues.\n\nParameters\n----------\nn_samples : int\n The number of samples passed to `fit()`.\n\nprediction_dim : int\n The dimension of a raw prediction, i.e. the number of trees\n built at each iteration. Equals 1 for regression and binary\n classification, or K where K is the number of classes for\n multiclass classification.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\nReturns\n-------\ngradients : ndarray, shape (prediction_dim, n_samples)\n The initial gradients. The array is not initialized.\nhessians : ndarray, shape (prediction_dim, n_samples)\n If hessians are constant (e.g. for `LeastSquares` loss, the\n array is initialized to ``1``. Otherwise, the array is allocated\n without being initialized." - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [ - { - "name": "y_train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target training values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data." - }, - { - "name": "prediction_dim", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimension of one prediction: 1 for binary classification and regression, n_classes for multiclass classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return initial predictions (before the first iteration).\n\nParameters\n----------\ny_train : ndarray, shape (n_samples,)\n The target training values.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\nprediction_dim : int\n The dimension of one prediction: 1 for binary classification and\n regression, n_classes for multiclass classification.\n\nReturns\n-------\nbaseline_prediction : float or ndarray, shape (1, prediction_dim)\n The baseline prediction." - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [ - { - "name": "gradients", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The gradients (treated as OUT array)." - }, - { - "name": "hessians", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The hessians (treated as OUT array)." - }, - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The true target values or each training sample." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw_predictions (i.e. values from the trees) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update gradients and hessians arrays, inplace.\n\nThe gradients (resp. hessians) are the first (resp. second) order\nderivatives of the loss for each sample with respect to the\npredictions of model, evaluated at iteration ``i - 1``.\n\nParameters\n----------\ngradients : ndarray, shape (prediction_dim, n_samples)\n The gradients (treated as OUT array).\n\nhessians : ndarray, shape (prediction_dim, n_samples) or (1,)\n The hessians (treated as OUT array).\n\ny_true : ndarray, shape (n_samples,)\n The true target values or each training sample.\n\nraw_predictions : ndarray, shape (prediction_dim, n_samples)\n The raw_predictions (i.e. values from the trees) of the tree\n ensemble at iteration ``i - 1``.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data." - } - ], - "docstring": "Base class for a loss." - }, - { - "name": "LeastSquares", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_link_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Least squares loss, for regression.\n\nFor a given sample x_i, least squares loss is defined as::\n\n loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2\n\nThis actually computes the half least squares loss to simplify\nthe computation of the gradients and get a unit hessian (and be consistent\nwith what is done in LightGBM)." - }, - { - "name": "LeastAbsoluteDeviation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_link_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_leaves_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Least absolute deviation, for regression.\n\nFor a given sample x_i, the loss is defined as::\n\n loss(x_i) = |y_true_i - raw_pred_i|" - }, - { - "name": "Poisson", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Poisson deviance loss with log-link, for regression.\n\nFor a given sample x_i, Poisson deviance loss is defined as::\n\n loss(x_i) = y_true_i * log(y_true_i/exp(raw_pred_i))\n - y_true_i + exp(raw_pred_i))\n\nThis actually computes half the Poisson deviance to simplify\nthe computation of the gradients." - }, - { - "name": "BinaryCrossEntropy", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binary cross-entropy loss, for binary classification.\n\nFor a given sample x_i, the binary cross-entropy loss is defined as the\nnegative log-likelihood of the model which can be expressed as::\n\n loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression)." - }, - { - "name": "CategoricalCrossEntropy", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Categorical cross-entropy loss, for multiclass classification.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the model and generalizes the binary\ncross-entropy to more than 2 classes." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.predictor", - "imports": [ - "import numpy as np", - "from common import Y_DTYPE", - "from _predictor import _predict_from_raw_data", - "from _predictor import _predict_from_binned_data", - "from _predictor import _compute_partial_dependence" - ], - "classes": [ - { - "name": "TreePredictor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "nodes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The nodes of the tree." - }, - { - "name": "binned_left_cat_bitsets", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of bitsets for binned categories used in predict_binned when a split is categorical." - }, - { - "name": "raw_left_cat_bitsets", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of bitsets for raw categories used in predict when a split is categorical." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_leaf_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return number of leaves." - }, - { - "name": "get_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return maximum depth among all leaves." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "known_cat_bitsets", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of bitsets of known categories, for each categorical feature." - }, - { - "name": "f_idx_map", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Map from original feature index to the corresponding index in the known_cat_bitsets array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict raw values for non-binned data.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n The input samples.\n\nknown_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n\nf_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The raw predicted values." - }, - { - "name": "predict_binned", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "missing_values_bin_idx", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the bin that is used for missing values. This is the index of the last bin and is always equal to max_bins (as passed to the GBDT classes), or equivalently to n_bins - 1." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict raw values for binned data.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n The input samples.\nmissing_values_bin_idx : uint8\n Index of the bin that is used for missing values. This is the\n index of the last bin and is always equal to max_bins (as passed\n to the GBDT classes), or equivalently to n_bins - 1.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The raw predicted values." - }, - { - "name": "compute_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - }, - { - "name": "out", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The value of the partial dependence function on each grid point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\nout : ndarray, shape (n_samples)\n The value of the partial dependence function on each grid\n point." - } - ], - "docstring": "Tree class used for predictions.\n\nParameters\n----------\nnodes : ndarray of PREDICTOR_RECORD_DTYPE\n The nodes of the tree.\nbinned_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32\n Array of bitsets for binned categories used in predict_binned when a\n split is categorical.\nraw_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32\n Array of bitsets for raw categories used in predict when a split is\n categorical." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_binning", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.binning import _find_binning_thresholds", - "from sklearn.ensemble._hist_gradient_boosting.binning import _map_to_bins", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF" - ], - "classes": [], - "functions": [ - { - "name": "test_find_binning_thresholds_regular_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_find_binning_thresholds_small_regular_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_find_binning_thresholds_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_find_binning_thresholds_low_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_n_features_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_map_to_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_small_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_identity_repeated_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_repeated_values_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_identity_small", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_idempotence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_bins_non_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_with_numerical_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_known_categories_bitsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_bitset", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_bitset_memoryview", - "from sklearn.ensemble._hist_gradient_boosting._bitset import in_bitset_memoryview", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_raw_bitset_from_binned_bitset", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE" - ], - "classes": [], - "functions": [ - { - "name": "test_set_get_bitset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raw_bitset_from_binned_bitset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_compare_lightgbm", - "imports": [ - "from sklearn.model_selection import train_test_split", - "from sklearn.metrics import accuracy_score", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "import numpy as np", - "import pytest", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator" - ], - "classes": [], - "functions": [ - { - "name": "test_same_predictions_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_predictions_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_predictions_multiclass_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_gradient_boosting", - "imports": [ - "import numpy as np", - "import pytest", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_array_equal", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_low_rank_matrix", - "from sklearn.preprocessing import KBinsDiscretizer", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.base import clone", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.base import is_regressor", - "from sklearn.pipeline import make_pipeline", - "from sklearn.metrics import mean_poisson_deviance", - "from sklearn.dummy import DummyRegressor", - "from sklearn.exceptions import NotFittedError", - "from sklearn.compose import make_column_transformer", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.ensemble._hist_gradient_boosting.loss import _LOSSES", - "from sklearn.ensemble._hist_gradient_boosting.loss import LeastSquares", - "from sklearn.ensemble._hist_gradient_boosting.loss import BinaryCrossEntropy", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.utils import shuffle" - ], - "classes": [], - "functions": [ - { - "name": "_make_dumb_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a dumb dataset to test early stopping." - }, - { - "name": "test_init_parameters_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_classification_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_should_stop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_least_absolute_deviation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_y_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binning_train_validation_are_separated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_trivial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_resilience", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_division_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_minmax_imputation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_lengths", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_crossentropy_binary_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_string_target_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_sample_weights_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_sample_weights_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_effect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sum_hessians_are_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_depth_max_leaf_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_on_test_set_with_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_node_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unknown_categories_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_encoding_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_spec_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_spec_no_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_bad_encoding_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uint8_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_grower", - "imports": [ - "import numpy as np", - "import pytest", - "from pytest import approx", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE" - ], - "classes": [], - "functions": [ - { - "name": "_make_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_children_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grow_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predictor_from_grower", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_leaf_root", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_is_stump", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_parameters_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_value_predict_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_split_on_nan_with_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grow_tree_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ohe_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_histogram", - "imports": [ - "import numpy as np", - "import pytest", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_array_equal", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_naive", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_no_hessian", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_root_no_hessian", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_root", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _subtract_histograms", - "from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE" - ], - "classes": [], - "functions": [ - { - "name": "test_build_histogram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_histogram_sample_order_independence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unrolled_equivalent_to_naive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hist_subtraction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_loss", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_allclose", - "from scipy.optimize import newton", - "from scipy.special import logit", - "from sklearn.utils import assert_all_finite", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.loss import _LOSSES", - "from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.utils._testing import skip_if_32bit" - ], - "classes": [], - "functions": [ - { - "name": "get_derivatives_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return get_gradients() and get_hessians() functions for a given loss.\n " - }, - { - "name": "test_derivatives", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numerical_gradients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_least_squares", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_least_absolute_deviation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_poisson", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_binary_crossentropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_categorical_crossentropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_multiplies_gradients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_gradient_and_hessians_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_monotonic_contraints", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint", - "from sklearn.ensemble._hist_gradient_boosting.splitting import Splitter", - "from sklearn.ensemble._hist_gradient_boosting.splitting import compute_node_value", - "from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier" - ], - "classes": [], - "functions": [ - { - "name": "is_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_decreasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_leaves_values_monotonic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_children_values_monotonic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_children_values_bounded", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nodes_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bounded_value_min_gain_to_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_predictor", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import make_regression", - "from sklearn.model_selection import train_test_split", - "from sklearn.metrics import r2_score", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import PREDICTOR_RECORD_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_bitset_memoryview", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_raw_bitset_from_binned_bitset" - ], - "classes": [], - "functions": [ - { - "name": "test_regression_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values_and_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_predictor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_splitting", - "imports": [ - "import numpy as np", - "import pytest", - "from numpy.testing import assert_array_equal", - "from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint", - "from sklearn.ensemble._hist_gradient_boosting.splitting import Splitter", - "from sklearn.ensemble._hist_gradient_boosting.splitting import compute_node_value", - "from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder", - "from sklearn.utils._testing import skip_if_32bit" - ], - "classes": [], - "functions": [ - { - "name": "test_histogram_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_and_hessian_sanity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_split_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_gain_to_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_splitting_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_splitting_categorical_cat_smooth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_assert_categories_equals_bitset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_splitting_categorical_sanity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_warm_start", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "import pytest", - "from sklearn.base import clone", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.metrics import check_scoring" - ], - "classes": [], - "functions": [ - { - "name": "_assert_predictor_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert that two HistGBM instances are identical." - }, - { - "name": "test_max_iter_with_warm_start_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_yields_identical_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_seeds_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.enable_halving_search_cv", - "imports": [ - "from model_selection._search_successive_halving import HalvingRandomSearchCV", - "from model_selection._search_successive_halving import HalvingGridSearchCV", - "from None import model_selection" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.enable_hist_gradient_boosting", - "imports": [ - "from ensemble._hist_gradient_boosting.gradient_boosting import HistGradientBoostingClassifier", - "from ensemble._hist_gradient_boosting.gradient_boosting import HistGradientBoostingRegressor", - "from None import ensemble" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.enable_iterative_imputer", - "imports": [ - "from impute._iterative import IterativeImputer", - "from None import impute" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.tests.test_enable_hist_gradient_boosting", - "imports": [ - "import textwrap", - "from sklearn.utils._testing import assert_run_python_script" - ], - "classes": [], - "functions": [ - { - "name": "test_imports_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.experimental.tests.test_enable_iterative_imputer", - "imports": [ - "import textwrap", - "from sklearn.utils._testing import assert_run_python_script" - ], - "classes": [], - "functions": [ - { - "name": "test_imports_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.experimental.tests.test_enable_successive_halving", - "imports": [ - "import textwrap", - "from sklearn.utils._testing import assert_run_python_script" - ], - "classes": [], - "functions": [ - { - "name": "test_imports_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.experimental.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.externals.conftest", - "imports": [], - "classes": [], - "functions": [ - { - "name": "pytest_ignore_collect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.externals._arff", - "imports": [ - "from typing import TYPE_CHECKING", - "from typing import Optional", - "from typing import List", - "from typing import Dict", - "from typing import Any", - "from typing import Iterator", - "from typing import Union", - "from typing import Tuple", - "import re", - "import sys", - "import csv", - "from typing_extensions import TypedDict", - "from itertools import izip as zip" - ], - "classes": [ - { - "name": "ArffContainerType", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "ArffException", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__str__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "BadRelationFormat", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when the relation declaration is in an invalid format." - }, - { - "name": "BadAttributeFormat", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when some attribute declaration is in an invalid format." - }, - { - "name": "BadDataFormat", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when some data instance is in an invalid format." - }, - { - "name": "BadAttributeType", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when some invalid type is provided into the attribute\ndeclaration." - }, - { - "name": "BadAttributeName", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when an attribute name is provided twice the attribute\ndeclaration." - }, - { - "name": "BadNominalValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when a value in used in some data instance but is not\ndeclared into it respective attribute declaration." - }, - { - "name": "BadNominalFormatting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when a nominal value with space is not properly quoted." - }, - { - "name": "BadNumericalValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when and invalid numerical value is used in some data\ninstance." - }, - { - "name": "BadStringValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raise when a string contains space but is not quoted." - }, - { - "name": "BadLayout", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when the layout of the ARFF file has something wrong." - }, - { - "name": "BadObject", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__str__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when the object representing the ARFF file has something\nwrong." - }, - { - "name": "EncodedNominalConversor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NominalConversor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DenseGeneratorData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_decode_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Encodes a line of data.\n\nData instances follow the csv format, i.e, attribute values are\ndelimited by commas. After converted from csv.\n\n:param data: a list of values.\n:param attributes: a list of attributes. Used to check if data is valid.\n:return: a string with the encoded data line." - } - ], - "docstring": "Internal helper class to allow for different matrix types without\nmaking the code a huge collection of if statements." - }, - { - "name": "_DataListMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin to return a list from decode_rows instead of a generator" - }, - { - "name": "Data", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "COOData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "LODGeneratorData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "LODData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "ArffDecoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Constructor." - }, - { - "name": "_decode_comment", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded comment.\n " - }, - { - "name": "_decode_relation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string. The string must\n start with alphabetic character and must be quoted if the name includes\n spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded relation name.\n " - }, - { - "name": "_decode_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes an attribute line.\n\n The attribute is the most complex declaration in an arff file. All\n attributes must follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, quoted if the name contains any\n whitespace, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n The nominal names follow the rules for the attribute names, i.e., they\n must be quoted if the name contains whitespaces.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).\n " - }, - { - "name": "_decode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Do the job the ``encode``." - }, - { - "name": "decode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_." - } - ], - "docstring": "An ARFF decoder." - }, - { - "name": "ArffEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_encode_comment", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Encodes a comment line.\n\nComments are single line strings starting, obligatorily, with the ``%``\ncharacter, and can have any symbol, including whitespaces or special\ncharacters.\n\nIf ``s`` is None, this method will simply return an empty comment.\n\n:param s: (OPTIONAL) string.\n:return: a string with the encoded comment line." - }, - { - "name": "_encode_relation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes a relation line.\n\nThe relation declaration is a line with the format ``@RELATION\n``, where ``relation-name`` is a string.\n\n:param name: a string.\n:return: a string with the encoded relation declaration." - }, - { - "name": "_encode_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n @attribute \n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n {, , , ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration." - }, - { - "name": "encode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Encodes a given object to an ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: the ARFF file as an unicode string." - }, - { - "name": "iter_encode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The iterative version of `arff.ArffEncoder.encode`.\n\nThis encodes iteratively a given object and return, one-by-one, the\nlines of the ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: (yields) the ARFF file as unicode strings." - } - ], - "docstring": "An ARFF encoder." - } - ], - "functions": [ - { - "name": "_build_re_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_escape_sub_callback", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_unquote", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_parse_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Split a line into a list of values" - }, - { - "name": "_unescape_sub_callback", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_object_for_decoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_object_for_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load a file-like object containing the ARFF document and convert it into\na Python object.\n\n:param fp: a file-like object.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.\n:return: a dictionary.\n " - }, - { - "name": "loads", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert a string instance containing the ARFF document into a Python\nobject.\n\n:param s: a string object.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.\n:return: a dictionary." - }, - { - "name": "dump", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Serialize an object representing the ARFF document to a given file-like\nobject.\n\n:param obj: a dictionary.\n:param fp: a file-like object." - }, - { - "name": "dumps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Serialize an object representing the ARFF document, returning a string.\n\n:param obj: a dictionary.\n:return: a string with the ARFF document." - } - ] - }, - { - "name": "sklearn.externals._lobpcg", - "imports": [ - "from __future__ import division", - "from __future__ import print_function", - "from __future__ import absolute_import", - "import numpy as np", - "from scipy.linalg import inv", - "from scipy.linalg import eigh", - "from scipy.linalg import cho_factor", - "from scipy.linalg import cho_solve", - "from scipy.linalg import cholesky", - "from scipy.linalg import orth", - "from scipy.linalg import LinAlgError", - "from scipy.sparse.linalg import aslinearoperator", - "import warnings", - "from scipy.linalg import norm" - ], - "classes": [], - "functions": [ - { - "name": "bmat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_save", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_report_nonhermitian", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Report if `M` is not a hermitian matrix given its type." - }, - { - "name": "_as2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "If the input array is 2D return it, if it is 1D, append a dimension,\nmaking it a column vector." - }, - { - "name": "_makeOperator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Takes a dense numpy array or a sparse matrix or\na function and makes an operator performing matrix * blockvector\nproducts." - }, - { - "name": "_applyConstraints", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Changes blockVectorV in place." - }, - { - "name": "_b_orthonormalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "B-orthonormalize the given block vector using Cholesky." - }, - { - "name": "_get_indx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get `num` indices into `_lambda` depending on `largest` option." - }, - { - "name": "lobpcg", - "decorators": [], - "parameters": [ - { - "name": "A", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The symmetric linear operator of the problem, usually a sparse matrix. Often called the \"stiffness matrix\"." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial approximation to the ``k`` eigenvectors (non-sparse). If `A` has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``." - }, - { - "name": "B", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The right hand side operator in a generalized eigenproblem. By default, ``B = Identity``. Often called the \"mass matrix\"." - }, - { - "name": "M", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preconditioner to `A`; by default ``M = Identity``. `M` should approximate the inverse of `A`." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-by-sizeY matrix of constraints (non-sparse), sizeY < n The iterations will be performed in the B-orthogonal complement of the column-space of Y. Y must be full rank." - }, - { - "name": "tol", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver tolerance (stopping criterion). The default is ``tol=n*sqrt(eps)``." - }, - { - "name": "maxiter", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. The default is ``maxiter=min(n, 20)``." - }, - { - "name": "largest", - "type": "Optional[bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When True, solve for the largest eigenvalues, otherwise the smallest." - }, - { - "name": "verbosityLevel", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls solver output. The default is ``verbosityLevel=0``." - }, - { - "name": "retLambdaHistory", - "type": "Optional[bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to return eigenvalue history. Default is False." - }, - { - "name": "retResidualNormsHistory", - "type": "Optional[bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to return history of residual norms. Default is False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)\n\nLOBPCG is a preconditioned eigensolver for large symmetric positive\ndefinite (SPD) generalized eigenproblems.\n\nParameters\n----------\nA : {sparse matrix, dense matrix, LinearOperator}\n The symmetric linear operator of the problem, usually a\n sparse matrix. Often called the \"stiffness matrix\".\nX : ndarray, float32 or float64\n Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\n has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.\nB : {dense matrix, sparse matrix, LinearOperator}, optional\n The right hand side operator in a generalized eigenproblem.\n By default, ``B = Identity``. Often called the \"mass matrix\".\nM : {dense matrix, sparse matrix, LinearOperator}, optional\n Preconditioner to `A`; by default ``M = Identity``.\n `M` should approximate the inverse of `A`.\nY : ndarray, float32 or float64, optional\n n-by-sizeY matrix of constraints (non-sparse), sizeY < n\n The iterations will be performed in the B-orthogonal complement\n of the column-space of Y. Y must be full rank.\ntol : scalar, optional\n Solver tolerance (stopping criterion).\n The default is ``tol=n*sqrt(eps)``.\nmaxiter : int, optional\n Maximum number of iterations. The default is ``maxiter=min(n, 20)``.\nlargest : bool, optional\n When True, solve for the largest eigenvalues, otherwise the smallest.\nverbosityLevel : int, optional\n Controls solver output. The default is ``verbosityLevel=0``.\nretLambdaHistory : bool, optional\n Whether to return eigenvalue history. Default is False.\nretResidualNormsHistory : bool, optional\n Whether to return history of residual norms. Default is False.\n\nReturns\n-------\nw : ndarray\n Array of ``k`` eigenvalues\nv : ndarray\n An array of ``k`` eigenvectors. `v` has the same shape as `X`.\nlambdas : list of ndarray, optional\n The eigenvalue history, if `retLambdaHistory` is True.\nrnorms : list of ndarray, optional\n The history of residual norms, if `retResidualNormsHistory` is True.\n\nNotes\n-----\nIf both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,\nthe return tuple has the following format\n``(lambda, V, lambda history, residual norms history)``.\n\nIn the following ``n`` denotes the matrix size and ``m`` the number\nof required eigenvalues (smallest or largest).\n\nThe LOBPCG code internally solves eigenproblems of the size ``3m`` on every\niteration by calling the \"standard\" dense eigensolver, so if ``m`` is not\nsmall enough compared to ``n``, it does not make sense to call the LOBPCG\ncode, but rather one should use the \"standard\" eigensolver, e.g. numpy or\nscipy function in this case.\nIf one calls the LOBPCG algorithm for ``5m > n``, it will most likely break\ninternally, so the code tries to call the standard function instead.\n\nIt is not that ``n`` should be large for the LOBPCG to work, but rather the\nratio ``n / m`` should be large. It you call LOBPCG with ``m=1``\nand ``n=10``, it works though ``n`` is small. The method is intended\nfor extremely large ``n / m``, see e.g., reference [28] in\nhttps://arxiv.org/abs/0705.2626\n\nThe convergence speed depends basically on two factors:\n\n1. How well relatively separated the seeking eigenvalues are from the rest\n of the eigenvalues. One can try to vary ``m`` to make this better.\n\n2. How well conditioned the problem is. This can be changed by using proper\n preconditioning. For example, a rod vibration test problem (under tests\n directory) is ill-conditioned for large ``n``, so convergence will be\n slow, unless efficient preconditioning is used. For this specific\n problem, a good simple preconditioner function would be a linear solve\n for `A`, which is easy to code since A is tridiagonal.\n\nReferences\n----------\n.. [1] A. V. Knyazev (2001),\n Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method.\n SIAM Journal on Scientific Computing 23, no. 2,\n pp. 517-541. http://dx.doi.org/10.1137/S1064827500366124\n\n.. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov\n (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers\n (BLOPEX) in hypre and PETSc. https://arxiv.org/abs/0705.2626\n\n.. [3] A. V. Knyazev's C and MATLAB implementations:\n https://bitbucket.org/joseroman/blopex\n\nExamples\n--------\n\nSolve ``A x = lambda x`` with constraints and preconditioning.\n\n>>> import numpy as np\n>>> from scipy.sparse import spdiags, issparse\n>>> from scipy.sparse.linalg import lobpcg, LinearOperator\n>>> n = 100\n>>> vals = np.arange(1, n + 1)\n>>> A = spdiags(vals, 0, n, n)\n>>> A.toarray()\narray([[ 1., 0., 0., ..., 0., 0., 0.],\n [ 0., 2., 0., ..., 0., 0., 0.],\n [ 0., 0., 3., ..., 0., 0., 0.],\n ...,\n [ 0., 0., 0., ..., 98., 0., 0.],\n [ 0., 0., 0., ..., 0., 99., 0.],\n [ 0., 0., 0., ..., 0., 0., 100.]])\n\nConstraints:\n\n>>> Y = np.eye(n, 3)\n\nInitial guess for eigenvectors, should have linearly independent\ncolumns. Column dimension = number of requested eigenvalues.\n\n>>> X = np.random.rand(n, 3)\n\nPreconditioner in the inverse of A in this example:\n\n>>> invA = spdiags([1./vals], 0, n, n)\n\nThe preconditiner must be defined by a function:\n\n>>> def precond( x ):\n... return invA @ x\n\nThe argument x of the preconditioner function is a matrix inside `lobpcg`,\nthus the use of matrix-matrix product ``@``.\n\nThe preconditioner function is passed to lobpcg as a `LinearOperator`:\n\n>>> M = LinearOperator(matvec=precond, matmat=precond,\n... shape=(n, n), dtype=float)\n\nLet us now solve the eigenvalue problem for the matrix A:\n\n>>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)\n>>> eigenvalues\narray([4., 5., 6.])\n\nNote that the vectors passed in Y are the eigenvectors of the 3 smallest\neigenvalues. The results returned are orthogonal to those." - } - ] - }, - { - "name": "sklearn.externals._pep562", - "imports": [ - "from __future__ import unicode_literals", - "import sys" - ], - "classes": [ - { - "name": "Pep562", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Acquire `__getattr__` and `__dir__`, but only replace module for versions less than Python 3.7." - }, - { - "name": "__dir__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the overridden `dir` if one was provided, else apply `dir` to the module." - }, - { - "name": "__getattr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present." - } - ], - "docstring": "Backport of PEP 562 .\n\nWraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`.\nThe given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed." - } - ], - "functions": [] - }, - { - "name": "sklearn.externals._pilutil", - "imports": [ - "from __future__ import division", - "from __future__ import print_function", - "from __future__ import absolute_import", - "import numpy", - "from numpy import amin", - "from numpy import amax", - "from numpy import ravel", - "from numpy import asarray", - "from numpy import arange", - "from numpy import ones", - "from numpy import newaxis", - "from numpy import transpose", - "from numpy import iscomplexobj", - "from numpy import uint8", - "from numpy import issubdtype", - "from numpy import array", - "from PIL import Image", - "import Image" - ], - "classes": [], - "functions": [ - { - "name": "bytescale", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "PIL image data array." - }, - { - "name": "cmin", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Bias scaling of small values. Default is ``data.min()``." - }, - { - "name": "cmax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Bias scaling of large values. Default is ``data.max()``." - }, - { - "name": "high", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scale max value to `high`. Default is 255." - }, - { - "name": "low", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scale min value to `low`. Default is 0." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Byte scales an array (image).\n\nByte scaling means converting the input image to uint8 dtype and scaling\nthe range to ``(low, high)`` (default 0-255).\nIf the input image already has dtype uint8, no scaling is done.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\ndata : ndarray\n PIL image data array.\ncmin : scalar, default=None\n Bias scaling of small values. Default is ``data.min()``.\ncmax : scalar, default=None\n Bias scaling of large values. Default is ``data.max()``.\nhigh : scalar, default=None\n Scale max value to `high`. Default is 255.\nlow : scalar, default=None\n Scale min value to `low`. Default is 0.\n\nReturns\n-------\nimg_array : uint8 ndarray\n The byte-scaled array.\n\nExamples\n--------\n>>> import numpy as np\n>>> from scipy.misc import bytescale\n>>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ],\n... [ 73.88003259, 80.91433048, 4.88878881],\n... [ 51.53875334, 34.45808177, 27.5873488 ]])\n>>> bytescale(img)\narray([[255, 0, 236],\n [205, 225, 4],\n [140, 90, 70]], dtype=uint8)\n>>> bytescale(img, high=200, low=100)\narray([[200, 100, 192],\n [180, 188, 102],\n [155, 135, 128]], dtype=uint8)\n>>> bytescale(img, cmin=0, cmax=255)\narray([[91, 3, 84],\n [74, 81, 5],\n [52, 34, 28]], dtype=uint8)" - }, - { - "name": "imread", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The file name or file object to be read." - }, - { - "name": "flatten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, flattens the color layers into a single gray-scale layer." - }, - { - "name": "mode", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mode to convert image to, e.g. ``'RGB'``. See the Notes for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Read an image from a file as an array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\nname : str or file object\n The file name or file object to be read.\nflatten : bool, default=False\n If True, flattens the color layers into a single gray-scale layer.\nmode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes for more\n details.\n\nReturns\n-------\nimread : ndarray\n The array obtained by reading the image.\n\nNotes\n-----\n`imread` uses the Python Imaging Library (PIL) to read an image.\nThe following notes are from the PIL documentation.\n\n`mode` can be one of the following strings:\n\n* 'L' (8-bit pixels, black and white)\n* 'P' (8-bit pixels, mapped to any other mode using a color palette)\n* 'RGB' (3x8-bit pixels, true color)\n* 'RGBA' (4x8-bit pixels, true color with transparency mask)\n* 'CMYK' (4x8-bit pixels, color separation)\n* 'YCbCr' (3x8-bit pixels, color video format)\n* 'I' (32-bit signed integer pixels)\n* 'F' (32-bit floating point pixels)\n\nPIL also provides limited support for a few special modes, including\n'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa'\n(true color with premultiplied alpha).\n\nWhen translating a color image to black and white (mode 'L', 'I' or\n'F'), the library uses the ITU-R 601-2 luma transform::\n\n L = R * 299/1000 + G * 587/1000 + B * 114/1000\n\nWhen `flatten` is True, the image is converted using mode 'F'.\nWhen `mode` is not None and `flatten` is True, the image is first\nconverted according to `mode`, and the result is then flattened using\nmode 'F'." - }, - { - "name": "imsave", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Output file name or file object." - }, - { - "name": "arr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing image values. If the shape is ``MxN``, the array represents a grey-level image. Shape ``MxNx3`` stores the red, green and blue bands along the last dimension. An alpha layer may be included, specified as the last colour band of an ``MxNx4`` array." - }, - { - "name": "format", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Image format. If omitted, the format to use is determined from the file name extension. If a file object was used instead of a file name, this parameter should always be used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Save an array as an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nParameters\n----------\nname : str or file object\n Output file name or file object.\narr : ndarray, MxN or MxNx3 or MxNx4\n Array containing image values. If the shape is ``MxN``, the array\n represents a grey-level image. Shape ``MxNx3`` stores the red, green\n and blue bands along the last dimension. An alpha layer may be\n included, specified as the last colour band of an ``MxNx4`` array.\nformat : str, default=None\n Image format. If omitted, the format to use is determined from the\n file name extension. If a file object was used instead of a file name,\n this parameter should always be used.\n\nExamples\n--------\nConstruct an array of gradient intensity values and save to file:\n\n>>> import numpy as np\n>>> from scipy.misc import imsave\n>>> x = np.zeros((255, 255))\n>>> x = np.zeros((255, 255), dtype=np.uint8)\n>>> x[:] = np.arange(255)\n>>> imsave('gradient.png', x)\n\nConstruct an array with three colour bands (R, G, B) and store to file:\n\n>>> rgb = np.zeros((255, 255, 3), dtype=np.uint8)\n>>> rgb[..., 0] = np.arange(255)\n>>> rgb[..., 1] = 55\n>>> rgb[..., 2] = 1 - np.arange(255)\n>>> imsave('rgb_gradient.png', rgb)" - }, - { - "name": "fromimage", - "decorators": [], - "parameters": [ - { - "name": "im", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input image." - }, - { - "name": "flatten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, convert the output to grey-scale." - }, - { - "name": "mode", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mode to convert image to, e.g. ``'RGB'``. See the Notes of the `imread` docstring for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a copy of a PIL image as a numpy array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\nim : PIL image\n Input image.\nflatten : bool, default=False\n If true, convert the output to grey-scale.\nmode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes of the\n `imread` docstring for more details.\n\nReturns\n-------\nfromimage : ndarray\n The different colour bands/channels are stored in the\n third dimension, such that a grey-image is MxN, an\n RGB-image MxNx3 and an RGBA-image MxNx4." - }, - { - "name": "toimage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Takes a numpy array and returns a PIL image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nThe mode of the PIL image depends on the array shape and the `pal` and\n`mode` keywords.\n\nFor 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values\n(from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode\nis given as 'F' or 'I' in which case a float and/or integer array is made.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nNotes\n-----\nFor 3-D arrays, the `channel_axis` argument tells which dimension of the\narray holds the channel data.\n\nFor 3-D arrays if one of the dimensions is 3, the mode is 'RGB'\nby default or 'YCbCr' if selected.\n\nThe numpy array must be either 2 dimensional or 3 dimensional." - }, - { - "name": "imresize", - "decorators": [], - "parameters": [ - { - "name": "arr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The array of image to be resized." - }, - { - "name": "size", - "type": "Union[float, int, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "* int - Percentage of current size. * float - Fraction of current size. * tuple - Size of the output image (height, width)." - }, - { - "name": "interp", - "type": "str", - "hasDefault": true, - "default": "'bilinear'", - "limitation": null, - "ignored": false, - "docstring": "Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear', 'bicubic' or 'cubic')." - }, - { - "name": "mode", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing. If ``mode=None`` (the default), 2-D images will be treated like ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays, `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Resize an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nParameters\n----------\narr : ndarray\n The array of image to be resized.\nsize : int, float or tuple\n * int - Percentage of current size.\n * float - Fraction of current size.\n * tuple - Size of the output image (height, width).\n\ninterp : str, default='bilinear'\n Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear',\n 'bicubic' or 'cubic').\nmode : str, default=None\n The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing.\n If ``mode=None`` (the default), 2-D images will be treated like\n ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays,\n `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively.\n\nReturns\n-------\nimresize : ndarray\n The resized array of image.\n\nSee Also\n--------\ntoimage : Implicitly used to convert `arr` according to `mode`.\nscipy.ndimage.zoom : More generic implementation that does not use PIL." - } - ] - }, - { - "name": "sklearn.externals", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_extraction.image", - "imports": [ - "from itertools import product", - "import numbers", - "import numpy as np", - "from scipy import sparse", - "from numpy.lib.stride_tricks import as_strided", - "from utils import check_array", - "from utils import check_random_state", - "from utils.validation import _deprecate_positional_args", - "from base import BaseEstimator" - ], - "classes": [ - { - "name": "PatchExtractor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "patch_size", - "type": "Tuple[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimensions of one patch." - }, - { - "name": "max_patches", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of patches per image to extract. If max_patches is a float in (0, 1), it is taken to mean a proportion of the total number of patches." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for random sampling when `max_patches` is not None. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of images from which to extract patches. For color images, the last dimension specifies the channel: a RGB image would have `n_channels=3`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transforms the image samples in X into a matrix of patch data.\n\nParameters\n----------\nX : ndarray of shape (n_samples, image_height, image_width) or (n_samples, image_height, image_width, n_channels)\n Array of images from which to extract patches. For color images,\n the last dimension specifies the channel: a RGB image would have\n `n_channels=3`.\n\nReturns\n-------\npatches : array of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the images, where\n `n_patches` is either `n_samples * max_patches` or the total\n number of patches that can be extracted." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Extracts patches from a collection of images\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\npatch_size : tuple of int (patch_height, patch_width), default=None\n The dimensions of one patch.\n\nmax_patches : int or float, default=None\n The maximum number of patches per image to extract. If max_patches is a\n float in (0, 1), it is taken to mean a proportion of the total number\n of patches.\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_images\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the second image in this dataset:\n>>> X = load_sample_images().images[1]\n>>> print('Image shape: {}'.format(X.shape))\nImage shape: (427, 640, 3)\n>>> pe = image.PatchExtractor(patch_size=(2, 2))\n>>> pe_fit = pe.fit(X)\n>>> pe_trans = pe.transform(X)\n>>> print('Patches shape: {}'.format(pe_trans.shape))\nPatches shape: (545706, 2, 2)" - } - ], - "functions": [ - { - "name": "_make_edges_3d", - "decorators": [], - "parameters": [ - { - "name": "n_x", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the grid in the x direction." - }, - { - "name": "n_y", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the grid in the y direction." - }, - { - "name": "n_z", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The size of the grid in the z direction, defaults to 1" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of edges for a 3D image.\n\nParameters\n----------\nn_x : int\n The size of the grid in the x direction.\nn_y : int\n The size of the grid in the y direction.\nn_z : integer, default=1\n The size of the grid in the z direction, defaults to 1" - }, - { - "name": "_compute_gradient_3d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_mask_edges_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply a mask to edges (weighted or not)" - }, - { - "name": "_to_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Auxiliary function for img_to_graph and grid_to_graph\n " - }, - { - "name": "img_to_graph", - "decorators": [], - "parameters": [ - { - "name": "img", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "2D or 3D image." - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional mask of the image, to consider only part of the pixels." - }, - { - "name": "return_as", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to use to build the returned adjacency matrix." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data of the returned sparse matrix. By default it is the dtype of img" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Graph of the pixel-to-pixel gradient connections\n\nEdges are weighted with the gradient values.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimg : ndarray of shape (height, width) or (height, width, channel)\n 2D or 3D image.\nmask : ndarray of shape (height, width) or (height, width, channel), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\nreturn_as : np.ndarray or a sparse matrix class, default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\ndtype : dtype, default=None\n The data of the returned sparse matrix. By default it is the\n dtype of img\n\nNotes\n-----\nFor scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\nhandled by returning a dense np.matrix instance. Going forward, np.ndarray\nreturns an np.ndarray, as expected.\n\nFor compatibility, user code relying on this method should wrap its\ncalls in ``np.asarray`` to avoid type issues." - }, - { - "name": "grid_to_graph", - "decorators": [], - "parameters": [ - { - "name": "n_x", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension in x axis" - }, - { - "name": "n_y", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension in y axis" - }, - { - "name": "n_z", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Dimension in z axis" - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional mask of the image, to consider only part of the pixels." - }, - { - "name": "return_as", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to use to build the returned adjacency matrix." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "int", - "limitation": null, - "ignored": false, - "docstring": "The data of the returned sparse matrix. By default it is int" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Graph of the pixel-to-pixel connections\n\nEdges exist if 2 voxels are connected.\n\nParameters\n----------\nn_x : int\n Dimension in x axis\nn_y : int\n Dimension in y axis\nn_z : int, default=1\n Dimension in z axis\nmask : ndarray of shape (n_x, n_y, n_z), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\nreturn_as : np.ndarray or a sparse matrix class, default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\ndtype : dtype, default=int\n The data of the returned sparse matrix. By default it is int\n\nNotes\n-----\nFor scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\nhandled by returning a dense np.matrix instance. Going forward, np.ndarray\nreturns an np.ndarray, as expected.\n\nFor compatibility, user code relying on this method should wrap its\ncalls in ``np.asarray`` to avoid type issues." - }, - { - "name": "_compute_n_patches", - "decorators": [], - "parameters": [ - { - "name": "i_h", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The image height" - }, - { - "name": "i_w", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The image with" - }, - { - "name": "p_h", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The height of a patch" - }, - { - "name": "p_w", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The width of a patch" - }, - { - "name": "max_patches", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of patches to extract. If max_patches is a float between 0 and 1, it is taken to be a proportion of the total number of patches." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the number of patches that will be extracted in an image.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ni_h : int\n The image height\ni_w : int\n The image with\np_h : int\n The height of a patch\np_w : int\n The width of a patch\nmax_patches : int or float, default=None\n The maximum number of patches to extract. If max_patches is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches." - }, - { - "name": "_extract_patches", - "decorators": [], - "parameters": [ - { - "name": "arr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-dimensional array of which patches are to be extracted" - }, - { - "name": "patch_shape", - "type": "Union[Tuple[], int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the shape of the patches to be extracted. If an integer is given, the shape will be a hypercube of sidelength given by its value." - }, - { - "name": "extraction_step", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Indicates step size at which extraction shall be performed. If integer is given, then the step is uniform in all dimensions." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Extracts patches of any n-dimensional array in place using strides.\n\nGiven an n-dimensional array it will return a 2n-dimensional array with\nthe first n dimensions indexing patch position and the last n indexing\nthe patch content. This operation is immediate (O(1)). A reshape\nperformed on the first n dimensions will cause numpy to copy data, leading\nto a list of extracted patches.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\narr : ndarray\n n-dimensional array of which patches are to be extracted\n\npatch_shape : int or tuple of length arr.ndim.default=8\n Indicates the shape of the patches to be extracted. If an\n integer is given, the shape will be a hypercube of\n sidelength given by its value.\n\nextraction_step : int or tuple of length arr.ndim, default=1\n Indicates step size at which extraction shall be performed.\n If integer is given, then the step is uniform in all dimensions.\n\n\nReturns\n-------\npatches : strided ndarray\n 2n-dimensional array indexing patches on first n dimensions and\n containing patches on the last n dimensions. These dimensions\n are fake, but this way no data is copied. A simple reshape invokes\n a copying operation to obtain a list of patches:\n result.reshape([-1] + list(patch_shape))" - }, - { - "name": "extract_patches_2d", - "decorators": [], - "parameters": [ - { - "name": "image", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The original image data. For color images, the last dimension specifies the channel: a RGB image would have `n_channels=3`." - }, - { - "name": "patch_size", - "type": "Tuple[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimensions of one patch." - }, - { - "name": "max_patches", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of patches to extract. If `max_patches` is a float between 0 and 1, it is taken to be a proportion of the total number of patches." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for random sampling when `max_patches` is not None. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reshape a 2D image into a collection of patches\n\nThe resulting patches are allocated in a dedicated array.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage : ndarray of shape (image_height, image_width) or (image_height, image_width, n_channels)\n The original image data. For color images, the last dimension specifies\n the channel: a RGB image would have `n_channels=3`.\n\npatch_size : tuple of int (patch_height, patch_width)\n The dimensions of one patch.\n\nmax_patches : int or float, default=None\n The maximum number of patches to extract. If `max_patches` is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches.\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\npatches : array of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the image, where `n_patches`\n is either `max_patches` or the total number of patches that can be\n extracted.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_image\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the first image in this dataset:\n>>> one_image = load_sample_image(\"china.jpg\")\n>>> print('Image shape: {}'.format(one_image.shape))\nImage shape: (427, 640, 3)\n>>> patches = image.extract_patches_2d(one_image, (2, 2))\n>>> print('Patches shape: {}'.format(patches.shape))\nPatches shape: (272214, 2, 2, 3)\n>>> # Here are just two of these patches:\n>>> print(patches[1])\n[[[174 201 231]\n [174 201 231]]\n [[173 200 230]\n [173 200 230]]]\n>>> print(patches[800])\n[[[187 214 243]\n [188 215 244]]\n [[187 214 243]\n [188 215 244]]]" - }, - { - "name": "reconstruct_from_patches_2d", - "decorators": [], - "parameters": [ - { - "name": "patches", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The complete set of patches. If the patches contain colour information, channels are indexed along the last dimension: RGB patches would have `n_channels=3`." - }, - { - "name": "image_size", - "type": "Tuple[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the image that will be reconstructed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reconstruct the image from all of its patches.\n\nPatches are assumed to overlap and the image is constructed by filling in\nthe patches from left to right, top to bottom, averaging the overlapping\nregions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npatches : ndarray of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The complete set of patches. If the patches contain colour information,\n channels are indexed along the last dimension: RGB patches would\n have `n_channels=3`.\n\nimage_size : tuple of int (image_height, image_width) or (image_height, image_width, n_channels)\n The size of the image that will be reconstructed.\n\nReturns\n-------\nimage : ndarray of shape image_size\n The reconstructed image." - } - ] - }, - { - "name": "sklearn.feature_extraction.setup", - "imports": [ - "import os", - "import platform", - "import numpy", - "from numpy.distutils.misc_util import Configuration" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.text", - "imports": [ - "import array", - "from collections import defaultdict", - "from collections.abc import Mapping", - "from functools import partial", - "import numbers", - "from operator import itemgetter", - "import re", - "import unicodedata", - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from preprocessing import normalize", - "from _hash import FeatureHasher", - "from _stop_words import ENGLISH_STOP_WORDS", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import FLOAT_DTYPES", - "from utils import _IS_32BIT", - "from utils.fixes import _astype_copy_false", - "from exceptions import NotFittedError", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "_VectorizerMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode", - "decorators": [], - "parameters": [ - { - "name": "doc", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to decode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Decode the input into a string of unicode symbols.\n\nThe decoding strategy depends on the vectorizer parameters.\n\nParameters\n----------\ndoc : str\n The string to decode.\n\nReturns\n-------\ndoc: str\n A string of unicode symbols." - }, - { - "name": "_word_ngrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Turn tokens into a sequence of n-grams after stop words filtering" - }, - { - "name": "_char_ngrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tokenize text_document into a sequence of character n-grams" - }, - { - "name": "_char_wb_ngrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Whitespace sensitive char-n-gram tokenization.\n\nTokenize text_document into a sequence of character n-grams\noperating only inside word boundaries. n-grams at the edges\nof words are padded with space." - }, - { - "name": "build_preprocessor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a function to preprocess the text before tokenization.\n\nReturns\n-------\npreprocessor: callable\n A function to preprocess the text before tokenization." - }, - { - "name": "build_tokenizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a function that splits a string into a sequence of tokens.\n\nReturns\n-------\ntokenizer: callable\n A function to split a string into a sequence of tokens." - }, - { - "name": "get_stop_words", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Build or fetch the effective stop words list.\n\nReturns\n-------\nstop_words: list or None\n A list of stop words." - }, - { - "name": "_check_stop_words_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Check if stop words are consistent\n\nReturns\n-------\nis_consistent : True if stop words are consistent with the preprocessor\n and tokenizer, False if they are not, None if the check\n was previously performed, \"error\" if it could not be\n performed (e.g. because of the use of a custom\n preprocessor / tokenizer)" - }, - { - "name": "build_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a callable that handles preprocessing, tokenization\nand n-grams generation.\n\nReturns\n-------\nanalyzer: callable\n A function to handle preprocessing, tokenization\n and n-grams generation." - }, - { - "name": "_validate_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if vocabulary is empty or missing (not fitted)" - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check validity of ngram_range parameter" - }, - { - "name": "_warn_for_unused_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Provides common code for text vectorizers (tokenization logic)." - }, - { - "name": "HashingVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "input", - "type": "Union[Literal['filename', 'file', 'content'], str]", - "hasDefault": true, - "default": "'content'", - "limitation": null, - "ignored": false, - "docstring": "If 'filename', the sequence passed as an argument to fit is expected to be a list of filenames that need reading to fetch the raw content to analyze. If 'file', the sequence items must have a 'read' method (file-like object) that is called to fetch the bytes in memory. Otherwise the input is expected to be a sequence of items that can be of type string or byte." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "'utf-8'", - "limitation": null, - "ignored": false, - "docstring": "If bytes or files are given to analyze, this encoding is used to decode." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'." - }, - { - "name": "strip_accents", - "type": "Literal['ascii', 'unicode']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have an direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`." - }, - { - "name": "lowercase", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Convert all characters to lowercase before tokenizing." - }, - { - "name": "preprocessor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer is not callable``." - }, - { - "name": "tokenizer", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``." - }, - { - "name": "stop_words", - "type": "Union[Literal['english'], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'english', a built-in stop word list for English is used. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``." - }, - { - "name": "token_pattern", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regular expression denoting what constitutes a \"token\", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted." - }, - { - "name": "ngram_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper boundary of the range of n-values for different n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams. Only applies if ``analyzer is not callable``." - }, - { - "name": "analyzer", - "type": "Literal['word', 'char', 'char_wb']", - "hasDefault": true, - "default": "'word'", - "limitation": null, - "ignored": false, - "docstring": "Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21 Since v0.21, if ``input`` is ``filename`` or ``file``, the data is first read from the file and then passed to the given callable analyzer." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features (columns) in the output matrices. Small numbers of features are likely to cause hash collisions, but large numbers will cause larger coefficient dimensions in linear learners." - }, - { - "name": "binary", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, all non zero counts are set to 1. This is useful for discrete probabilistic models that model binary events rather than integer counts." - }, - { - "name": "norm", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Norm used to normalize term vectors. None for no normalization." - }, - { - "name": "alternate_sign", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When True, an alternating sign is added to the features as to approximately conserve the inner product in the hashed space even for small n_features. This approach is similar to sparse random projection. .. versionadded:: 0.19" - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Type of the matrix returned by fit_transform() or transform()." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Does nothing: this transformer is stateless.\n\nThis method is just there to mark the fact that this transformer\ncan work in a streaming setup.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n Training data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Does nothing: this transformer is stateless.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n Training data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. Each sample must be a text document (either bytes or unicode strings, file name or file object depending on the constructor argument) which will be tokenized and hashed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. Each sample must be a text document (either bytes or unicode strings, file name or file object depending on the constructor argument) which will be tokenized and hashed." - }, - { - "name": "y", - "type": "Any", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with sklearn.pipeline.Pipeline." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\ny : any\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "_get_hasher", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Convert a collection of text documents to a matrix of token occurrences\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n store a vocabulary dictionary in memory\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n constructor parameters\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n string feature names) which can be a problem when trying to introspect\n which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n feature index. However in practice this is rarely an issue if n_features\n is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\ninput : string {'filename', 'file', 'content'}, default='content'\n If 'filename', the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n If 'file', the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n Otherwise the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nstop_words : string {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nn_features : int, default=(2 ** 20)\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\n\nbinary : bool, default=False.\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\nnorm : {'l1', 'l2'}, default='l2'\n Norm used to normalize term vectors. None for no normalization.\n\nalternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n .. versionadded:: 0.19\n\ndtype : type, default=np.float64\n Type of the matrix returned by fit_transform() or transform().\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import HashingVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = HashingVectorizer(n_features=2**4)\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(X.shape)\n(4, 16)\n\nSee Also\n--------\nCountVectorizer, TfidfVectorizer" - }, - { - "name": "CountVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "input", - "type": "Union[Literal['filename', 'file', 'content'], str]", - "hasDefault": true, - "default": "'content'", - "limitation": null, - "ignored": false, - "docstring": "If 'filename', the sequence passed as an argument to fit is expected to be a list of filenames that need reading to fetch the raw content to analyze. If 'file', the sequence items must have a 'read' method (file-like object) that is called to fetch the bytes in memory. Otherwise the input is expected to be a sequence of items that can be of type string or byte." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "'utf-8'", - "limitation": null, - "ignored": false, - "docstring": "If bytes or files are given to analyze, this encoding is used to decode." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'." - }, - { - "name": "strip_accents", - "type": "Literal['ascii', 'unicode']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have an direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`." - }, - { - "name": "lowercase", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Convert all characters to lowercase before tokenizing." - }, - { - "name": "preprocessor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the preprocessing (strip_accents and lowercase) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer is not callable``." - }, - { - "name": "tokenizer", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``." - }, - { - "name": "stop_words", - "type": "Union[Literal['english'], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'english', a built-in stop word list for English is used. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. max_df can be set to a value in the range [0.7, 1.0) to automatically detect and filter stop words based on intra corpus document frequency of terms." - }, - { - "name": "token_pattern", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regular expression denoting what constitutes a \"token\", only used if ``analyzer == 'word'``. The default regexp select tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted." - }, - { - "name": "ngram_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper boundary of the range of n-values for different word n-grams or char n-grams to be extracted. All values of n such such that min_n <= n <= max_n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams. Only applies if ``analyzer is not callable``." - }, - { - "name": "analyzer", - "type": "Literal['word', 'char', 'char_wb']", - "hasDefault": true, - "default": "'word'", - "limitation": null, - "ignored": false, - "docstring": "Whether the feature should be made of word n-gram or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21 Since v0.21, if ``input`` is ``filename`` or ``file``, the data is first read from the file and then passed to the given callable analyzer." - }, - { - "name": "max_df", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "min_df", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. This value is also called cut-off in the literature. If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "max_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. This parameter is ignored if vocabulary is not None." - }, - { - "name": "vocabulary", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Either a Mapping (e.g., a dict) where keys are terms and values are indices in the feature matrix, or an iterable over terms. If not given, a vocabulary is determined from the input documents. Indices in the mapping should not be repeated and should not have any gap between 0 and the largest index." - }, - { - "name": "binary", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, all non zero counts are set to 1. This is useful for discrete probabilistic models that model binary events rather than integer counts." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Type of the matrix returned by fit_transform() or transform()." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sort_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sort features by name\n\nReturns a reordered matrix and modifies the vocabulary in place" - }, - { - "name": "_limit_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Remove too rare or too common features.\n\nPrune features that are non zero in more samples than high or less\ndocuments than low, modifying the vocabulary, and restricting it to\nat most the limit most frequent.\n\nThis does not prune samples with zero features." - }, - { - "name": "_count_vocab", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create sparse feature matrix, and vocabulary where fixed_vocab=False\n " - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn a vocabulary dictionary of all tokens in the raw documents.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nself" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn the vocabulary dictionary and return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : array of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform documents to document-term matrix.\n\nExtract token counts out of raw text documents using the vocabulary\nfitted with fit or the one provided to the constructor.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document-term matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return terms per document with nonzero entries in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document-term matrix.\n\nReturns\n-------\nX_inv : list of arrays of shape (n_samples,)\n List of arrays of terms." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Array mapping from feature integer indices to feature name.\n\nReturns\n-------\nfeature_names : list\n A list of feature names." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Convert a collection of text documents to a matrix of token counts\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ninput : string {'filename', 'file', 'content'}, default='content'\n If 'filename', the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n If 'file', the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n Otherwise the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (strip_accents and lowercase) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nstop_words : string {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp select tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n word n-grams or char n-grams to be extracted. All values of n such\n such that min_n <= n <= max_n will be used. For example an\n ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n Only applies if ``analyzer is not callable``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word n-gram or character\n n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nmax_df : float in range [0.0, 1.0] or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmin_df : float in range [0.0, 1.0] or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents. Indices\n in the mapping should not be repeated and should not have any gap\n between 0 and the largest index.\n\nbinary : bool, default=False\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\ndtype : type, default=np.int64\n Type of the matrix returned by fit_transform() or transform().\n\nAttributes\n----------\nvocabulary_ : dict\n A mapping of terms to feature indices.\n\nfixed_vocabulary_: boolean\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\nstop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = CountVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(vectorizer.get_feature_names())\n['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n>>> print(X.toarray())\n[[0 1 1 1 0 0 1 0 1]\n [0 2 0 1 0 1 1 0 1]\n [1 0 0 1 1 0 1 1 1]\n [0 1 1 1 0 0 1 0 1]]\n>>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n>>> X2 = vectorizer2.fit_transform(corpus)\n>>> print(vectorizer2.get_feature_names())\n['and this', 'document is', 'first document', 'is the', 'is this',\n'second document', 'the first', 'the second', 'the third', 'third one',\n 'this document', 'this is', 'this the']\n >>> print(X2.toarray())\n [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n [0 1 0 1 0 1 0 1 0 0 1 0 0]\n [1 0 0 1 0 0 0 0 1 1 0 1 0]\n [0 0 1 0 1 0 1 0 0 0 0 0 1]]\n\nSee Also\n--------\nHashingVectorizer, TfidfVectorizer\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling." - }, - { - "name": "TfidfTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "norm", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Each output row will have unit norm, either: * 'l2': Sum of squares of vector elements is 1. The cosine similarity between two vectors is their dot product when l2 norm has been applied. * 'l1': Sum of absolute values of vector elements is 1. See :func:`preprocessing.normalize`" - }, - { - "name": "use_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Enable inverse-document-frequency reweighting." - }, - { - "name": "smooth_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Smooth idf weights by adding one to document frequencies, as if an extra document was seen containing every term in the collection exactly once. Prevents zero divisions." - }, - { - "name": "sublinear_tf", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A matrix of term/token counts." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn the idf vector (global term weights).\n\nParameters\n----------\nX : sparse matrix of shape n_samples, n_features)\n A matrix of term/token counts." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "a matrix of term/token counts" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy X and operate on the copy or perform in-place operations." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a count matrix to a tf or tf-idf representation\n\nParameters\n----------\nX : sparse matrix of (n_samples, n_features)\n a matrix of term/token counts\n\ncopy : bool, default=True\n Whether to copy X and operate on the copy or perform in-place\n operations.\n\nReturns\n-------\nvectors : sparse matrix of shape (n_samples, n_features)" - }, - { - "name": "idf_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform a count matrix to a normalized tf or tf-idf representation\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnorm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`\n\nuse_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\nsmooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nidf_ : array of shape (n_features)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\n .. versionadded:: 0.20\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfTransformer\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> from sklearn.pipeline import Pipeline\n>>> import numpy as np\n>>> corpus = ['this is the first document',\n... 'this document is the second document',\n... 'and this is the third one',\n... 'is this the first document']\n>>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n... 'and', 'one']\n>>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n... ('tfid', TfidfTransformer())]).fit(corpus)\n>>> pipe['count'].transform(corpus).toarray()\narray([[1, 1, 1, 1, 0, 1, 0, 0],\n [1, 2, 0, 1, 1, 1, 0, 0],\n [1, 0, 0, 1, 0, 1, 1, 1],\n [1, 1, 1, 1, 0, 1, 0, 0]])\n>>> pipe['tfid'].idf_\narray([1. , 1.22314355, 1.51082562, 1. , 1.91629073,\n 1. , 1.91629073, 1.91629073])\n>>> pipe.transform(corpus).shape\n(4, 8)\n\nReferences\n----------\n\n.. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n Information Retrieval. Addison Wesley, pp. 68-74.\n\n.. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze (2008).\n Introduction to Information Retrieval. Cambridge University\n Press, pp. 118-120." - }, - { - "name": "TfidfVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "input", - "type": "Literal['filename', 'file', 'content']", - "hasDefault": true, - "default": "'content'", - "limitation": null, - "ignored": false, - "docstring": "If 'filename', the sequence passed as an argument to fit is expected to be a list of filenames that need reading to fetch the raw content to analyze. If 'file', the sequence items must have a 'read' method (file-like object) that is called to fetch the bytes in memory. Otherwise the input is expected to be a sequence of items that can be of type string or byte." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "'utf-8'", - "limitation": null, - "ignored": false, - "docstring": "If bytes or files are given to analyze, this encoding is used to decode." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'." - }, - { - "name": "strip_accents", - "type": "Literal['ascii', 'unicode']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have an direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`." - }, - { - "name": "lowercase", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Convert all characters to lowercase before tokenizing." - }, - { - "name": "preprocessor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer is not callable``." - }, - { - "name": "tokenizer", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``." - }, - { - "name": "analyzer", - "type": "Literal['word', 'char', 'char_wb']", - "hasDefault": true, - "default": "'word'", - "limitation": null, - "ignored": false, - "docstring": "Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21 Since v0.21, if ``input`` is ``filename`` or ``file``, the data is first read from the file and then passed to the given callable analyzer." - }, - { - "name": "stop_words", - "type": "Literal['english']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If a string, it is passed to _check_stop_list and the appropriate stop list is returned. 'english' is currently the only supported string value. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. max_df can be set to a value in the range [0.7, 1.0) to automatically detect and filter stop words based on intra corpus document frequency of terms." - }, - { - "name": "token_pattern", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regular expression denoting what constitutes a \"token\", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted." - }, - { - "name": "ngram_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper boundary of the range of n-values for different n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams. Only applies if ``analyzer is not callable``." - }, - { - "name": "max_df", - "type": "Union[int, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). If float in range [0.0, 1.0], the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "min_df", - "type": "Union[int, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. This value is also called cut-off in the literature. If float in range of [0.0, 1.0], the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "max_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. This parameter is ignored if vocabulary is not None." - }, - { - "name": "vocabulary", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Either a Mapping (e.g., a dict) where keys are terms and values are indices in the feature matrix, or an iterable over terms. If not given, a vocabulary is determined from the input documents." - }, - { - "name": "binary", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, all non-zero term counts are set to 1. This does not mean outputs will have only 0/1 values, only that the tf term in tf-idf is binary. (Set idf and normalization to False to get 0/1 outputs)." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "float64", - "limitation": null, - "ignored": false, - "docstring": "Type of the matrix returned by fit_transform() or transform()." - }, - { - "name": "norm", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Each output row will have unit norm, either: * 'l2': Sum of squares of vector elements is 1. The cosine similarity between two vectors is their dot product when l2 norm has been applied. * 'l1': Sum of absolute values of vector elements is 1. See :func:`preprocessing.normalize`." - }, - { - "name": "use_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Enable inverse-document-frequency reweighting." - }, - { - "name": "smooth_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Smooth idf weights by adding one to document frequencies, as if an extra document was seen containing every term in the collection exactly once. Prevents zero divisions." - }, - { - "name": "sublinear_tf", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "use_idf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "smooth_idf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "sublinear_tf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "idf_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is not needed to compute tfidf." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn vocabulary and idf from training set.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\ny : None\n This parameter is not needed to compute tfidf.\n\nReturns\n-------\nself : object\n Fitted vectorizer." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn vocabulary and idf, return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\ny : None\n This parameter is ignored.\n\nReturns\n-------\nX : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform documents to document-term matrix.\n\nUses the vocabulary and document frequencies (df) learned by fit (or\nfit_transform).\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n If 'filename', the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n If 'file', the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n Otherwise the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : str, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nstop_words : {'english'}, list, default=None\n If a string, it is passed to _check_stop_list and the appropriate stop\n list is returned. 'english' is currently the only supported string\n value.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\nmax_df : float or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float in range [0.0, 1.0], the parameter represents a proportion of\n documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmin_df : float or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float in range of [0.0, 1.0], the parameter represents a proportion\n of documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents.\n\nbinary : bool, default=False\n If True, all non-zero term counts are set to 1. This does not mean\n outputs will have only 0/1 values, only that the tf term in tf-idf\n is binary. (Set idf and normalization to False to get 0/1 outputs).\n\ndtype : dtype, default=float64\n Type of the matrix returned by fit_transform() or transform().\n\nnorm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`.\n\nuse_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\nsmooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nvocabulary_ : dict\n A mapping of terms to feature indices.\n\nfixed_vocabulary_: bool\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\nidf_ : array of shape (n_features,)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\nstop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfTransformer : Performs the TF-IDF transformation from a provided\n matrix of counts.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = TfidfVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(vectorizer.get_feature_names())\n['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n>>> print(X.shape)\n(4, 9)" - } - ], - "functions": [ - { - "name": "_preprocess", - "decorators": [], - "parameters": [ - { - "name": "doc: str", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to preprocess" - }, - { - "name": "accent_function: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function for handling accented characters. Common strategies include normalizing and removing." - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function for handling accented characters. Common strategies include normalizing and removing." - }, - { - "name": "lower: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use str.lower to lowercase all fo the text" - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use str.lower to lowercase all fo the text" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Chain together an optional series of text preprocessing steps to\napply to a document.\n\nParameters\n----------\ndoc: str\n The string to preprocess\naccent_function: callable, default=None\n Function for handling accented characters. Common strategies include\n normalizing and removing.\nlower: bool, default=False\n Whether to use str.lower to lowercase all fo the text\n\nReturns\n-------\ndoc: str\n preprocessed string" - }, - { - "name": "_analyze", - "decorators": [], - "parameters": [ - { - "name": "analyzer: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "tokenizer: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "ngrams: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "preprocessor: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "decoder: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "stop_words: list", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Chain together an optional series of text processing steps to go from\na single document to ngrams, with or without tokenizing or preprocessing.\n\nIf analyzer is used, only the decoder argument is used, as the analyzer is\nintended to replace the preprocessor, tokenizer, and ngrams steps.\n\nParameters\n----------\nanalyzer: callable, default=None\ntokenizer: callable, default=None\nngrams: callable, default=None\npreprocessor: callable, default=None\ndecoder: callable, default=None\nstop_words: list, default=None\n\nReturns\n-------\nngrams: list\n A sequence of tokens, possibly with pairs, triples, etc." - }, - { - "name": "strip_accents_unicode", - "decorators": [], - "parameters": [ - { - "name": "s", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to strip" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform accentuated unicode symbols into their simple counterpart\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.\n\nParameters\n----------\ns : string\n The string to strip\n\nSee Also\n--------\nstrip_accents_ascii : Remove accentuated char for any unicode symbol that\n has a direct ASCII equivalent." - }, - { - "name": "strip_accents_ascii", - "decorators": [], - "parameters": [ - { - "name": "s", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to strip" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform accentuated unicode symbols into ascii or nothing\n\nWarning: this solution is only suited for languages that have a direct\ntransliteration to ASCII symbols.\n\nParameters\n----------\ns : string\n The string to strip\n\nSee Also\n--------\nstrip_accents_unicode : Remove accentuated char for any unicode symbol." - }, - { - "name": "strip_tags", - "decorators": [], - "parameters": [ - { - "name": "s", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to strip" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Basic regexp based HTML / XML tag stripper function\n\nFor serious HTML/XML preprocessing you should rather use an external\nlibrary such as lxml or BeautifulSoup.\n\nParameters\n----------\ns : string\n The string to strip" - }, - { - "name": "_check_stop_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_document_frequency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count the number of non-zero values for each feature in sparse X." - }, - { - "name": "_make_int_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Construct an array.array of a type suitable for scipy.sparse indices." - } - ] - }, - { - "name": "sklearn.feature_extraction._dict_vectorizer", - "imports": [ - "from array import array", - "from collections.abc import Mapping", - "from collections.abc import Iterable", - "from operator import itemgetter", - "from numbers import Number", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import tosequence", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "DictVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The type of feature values. Passed to Numpy array/scipy.sparse matrix constructors as the dtype argument." - }, - { - "name": "separator", - "type": "str", - "hasDefault": true, - "default": "\"", - "limitation": null, - "ignored": false, - "docstring": "Separator string used when constructing new features for one-hot coding." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether transform should produce scipy.sparse matrices." - }, - { - "name": "sort", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether ``feature_names_`` and ``vocabulary_`` should be sorted when fitting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_add_iterable_element", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Add feature names for iterable of strings" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype). .. versionchanged:: 0.24 Accepts multiple string values for one categorical feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn a list of feature name -> indices mappings.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n\nReturns\n-------\nself" - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype). .. versionchanged:: 0.24 Accepts multiple string values for one categorical feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn a list of feature name -> indices mappings and transform X.\n\nLike fit(X) followed by transform(X), but does not require\nmaterializing X in memory.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n\nReturns\n-------\nXa : {array, sparse matrix}\n Feature vectors; always 2-d." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample matrix." - }, - { - "name": "dict_type", - "type": null, - "hasDefault": true, - "default": "dict", - "limitation": null, - "ignored": false, - "docstring": "Constructor for feature mappings. Must conform to the collections.Mapping API." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform array or sparse matrix X back to feature mappings.\n\nX must have been produced by this DictVectorizer's transform or\nfit_transform method; it may only have passed through transformers\nthat preserve the number of features and their order.\n\nIn the case of one-hot/one-of-K coding, the constructed feature\nnames and values are returned rather than the original ones.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample matrix.\ndict_type : type, default=dict\n Constructor for feature mappings. Must conform to the\n collections.Mapping API.\n\nReturns\n-------\nD : list of dict_type objects of shape (n_samples,)\n Feature mappings for the samples in X." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform feature->value dicts to array or sparse matrix.\n\nNamed features not encountered during fit or fit_transform will be\nsilently ignored.\n\nParameters\n----------\nX : Mapping or iterable over Mappings of shape (n_samples,)\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\nReturns\n-------\nXa : {array, sparse matrix}\n Feature vectors; always 2-d." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of feature names, ordered by their indices.\n\nIf one-of-K coding is applied to categorical features, this will\ninclude the constructed feature names but not the original ones." - }, - { - "name": "restrict", - "decorators": [], - "parameters": [ - { - "name": "support", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Boolean mask or list of indices (as returned by the get_support member of feature selectors)." - }, - { - "name": "indices", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether support is a list of indices." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Restrict the features to those in support using feature selection.\n\nThis function modifies the estimator in-place.\n\nParameters\n----------\nsupport : array-like\n Boolean mask or list of indices (as returned by the get_support\n member of feature selectors).\nindices : bool, default=False\n Whether support is a list of indices.\n\nReturns\n-------\nself\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> v = DictVectorizer()\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n>>> v.get_feature_names()\n['bar', 'baz', 'foo']\n>>> v.restrict(support.get_support())\nDictVectorizer()\n>>> v.get_feature_names()\n['bar', 'foo']" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndtype : dtype, default=np.float64\n The type of feature values. Passed to Numpy array/scipy.sparse matrix\n constructors as the dtype argument.\nseparator : str, default=\"=\"\n Separator string used when constructing new features for one-hot\n coding.\nsparse : bool, default=True\n Whether transform should produce scipy.sparse matrices.\nsort : bool, default=True\n Whether ``feature_names_`` and ``vocabulary_`` should be\n sorted when fitting.\n\nAttributes\n----------\nvocabulary_ : dict\n A dictionary mapping feature names to feature indices.\n\nfeature_names_ : list\n A list of length n_features containing the feature names (e.g., \"f=ham\"\n and \"f=spam\").\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> v = DictVectorizer(sparse=False)\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> X\narray([[2., 0., 1.],\n [0., 1., 3.]])\n>>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n... {'baz': 1.0, 'foo': 3.0}]\nTrue\n>>> v.transform({'foo': 4, 'unseen_feature': 3})\narray([[0., 0., 4.]])\n\nSee Also\n--------\nFeatureHasher : Performs vectorization using only a hash function.\nsklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n features encoded as columns of arbitrary data types." - } - ], - "functions": [ - { - "name": "_tosequence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Turn X into a sequence or ndarray, avoiding a copy if possible." - } - ] - }, - { - "name": "sklearn.feature_extraction._hash", - "imports": [ - "import numbers", - "import numpy as np", - "import scipy.sparse as sp", - "from utils import IS_PYPY", - "from utils.validation import _deprecate_positional_args", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from _hashing_fast import transform as _hashing_transform" - ], - "classes": [ - { - "name": "FeatureHasher", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "2**20", - "limitation": null, - "ignored": false, - "docstring": "The number of features (columns) in the output matrices. Small numbers of features are likely to cause hash collisions, but large numbers will cause larger coefficient dimensions in linear learners." - }, - { - "name": "input_type", - "type": "Literal[\"dict\", \"pair\", \"string\"]", - "hasDefault": true, - "default": "\"dict\"", - "limitation": null, - "ignored": false, - "docstring": "Either \"dict\" (the default) to accept dictionaries over (feature_name, value); \"pair\" to accept pairs of (feature_name, value); or \"string\" to accept single strings. feature_name should be a string, while value should be a number. In the case of \"string\", a value of 1 is implied. The feature_name is hashed to find the appropriate column for the feature. The value's sign might be flipped in the output (but see non_negative, below)." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The type of feature values. Passed to scipy.sparse matrix constructors as the dtype argument. Do not set this to bool, np.boolean or any unsigned integer type." - }, - { - "name": "alternate_sign", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When True, an alternating sign is added to the features as to approximately conserve the inner product in the hashed space even for small n_features. This approach is similar to sparse random projection." - }, - { - "name": ".. versionchanged:: 0.19", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``alternate_sign`` replaces the now deprecated ``non_negative`` parameter." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "No-op.\n\nThis method doesn't do anything. It exists purely for compatibility\nwith the scikit-learn transformer API.\n\nParameters\n----------\nX : ndarray\n\nReturns\n-------\nself : FeatureHasher" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "raw_X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. Each sample must be iterable an (e.g., a list or tuple) containing/generating feature names (and optionally values, see the input_type constructor argument) which will be hashed. raw_X need not support the len function, so it can be the result of a generator; n_samples is determined on the fly." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a sequence of instances to a scipy.sparse matrix.\n\nParameters\n----------\nraw_X : iterable over iterable over raw features, length = n_samples\n Samples. Each sample must be iterable an (e.g., a list or tuple)\n containing/generating feature names (and optionally values, see\n the input_type constructor argument) which will be hashed.\n raw_X need not support the len function, so it can be the result\n of a generator; n_samples is determined on the fly.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Feature matrix, for use with estimators or further transformers." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_features : int, default=2**20\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\ninput_type : {\"dict\", \"pair\", \"string\"}, default=\"dict\"\n Either \"dict\" (the default) to accept dictionaries over\n (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n or \"string\" to accept single strings.\n feature_name should be a string, while value should be a number.\n In the case of \"string\", a value of 1 is implied.\n The feature_name is hashed to find the appropriate column for the\n feature. The value's sign might be flipped in the output (but see\n non_negative, below).\ndtype : numpy dtype, default=np.float64\n The type of feature values. Passed to scipy.sparse matrix constructors\n as the dtype argument. Do not set this to bool, np.boolean or any\n unsigned integer type.\nalternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n.. versionchanged:: 0.19\n ``alternate_sign`` replaces the now deprecated ``non_negative``\n parameter.\n\nExamples\n--------\n>>> from sklearn.feature_extraction import FeatureHasher\n>>> h = FeatureHasher(n_features=10)\n>>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n>>> f = h.transform(D)\n>>> f.toarray()\narray([[ 0., 0., -4., -1., 0., 0., 0., 0., 0., 2.],\n [ 0., 0., 0., -2., -5., 0., 0., 0., 0., 0.]])\n\nSee Also\n--------\nDictVectorizer : Vectorizes string-valued features using a hash table.\nsklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features." - } - ], - "functions": [ - { - "name": "_hashing_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iteritems", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Like d.iteritems, but accepts any collections.Mapping." - } - ] - }, - { - "name": "sklearn.feature_extraction._stop_words", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_extraction", - "imports": [ - "from _dict_vectorizer import DictVectorizer", - "from _hash import FeatureHasher", - "from image import img_to_graph", - "from image import grid_to_graph", - "from None import text" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_extraction.tests.test_dict_vectorizer", - "imports": [ - "from random import Random", - "import numpy as np", - "import scipy.sparse as sp", - "from numpy.testing import assert_array_equal", - "import pytest", - "from sklearn.feature_extraction import DictVectorizer", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import chi2" - ], - "classes": [], - "functions": [ - { - "name": "test_dictvectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_of_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterable_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterable_not_string_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mapping_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unseen_or_no_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deterministic_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests.test_feature_hasher", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_equal", - "import pytest", - "from sklearn.feature_extraction import FeatureHasher", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import fails_if_pypy", - "from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform" - ], - "classes": [], - "functions": [ - { - "name": "test_feature_hasher_dicts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_hasher_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashing_transform_seed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_hasher_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_hasher_pairs_with_string_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hash_empty_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_alternate_sign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hash_collisions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests.test_image", - "imports": [ - "import numpy as np", - "import scipy as sp", - "from scipy import ndimage", - "from scipy.sparse.csgraph import connected_components", - "import pytest", - "from sklearn.feature_extraction.image import img_to_graph", - "from sklearn.feature_extraction.image import grid_to_graph", - "from sklearn.feature_extraction.image import extract_patches_2d", - "from sklearn.feature_extraction.image import reconstruct_from_patches_2d", - "from sklearn.feature_extraction.image import PatchExtractor", - "from sklearn.feature_extraction.image import _extract_patches", - "from sklearn.utils._testing import ignore_warnings", - "from scipy import misc" - ], - "classes": [], - "functions": [ - { - "name": "test_img_to_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_to_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connect_regions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connect_regions_with_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_downsampled_face", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_orange_face", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_images", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_all", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_all_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_all_rect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_max_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patch_same_size_image", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_less_than_max_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_reconstruct_patches_perfect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_reconstruct_patches_perfect_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_max_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_max_patches_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_all_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_strided", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_square", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_width_patch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests.test_text", - "imports": [ - "from collections.abc import Mapping", - "import re", - "import pytest", - "from scipy import sparse", - "from sklearn.feature_extraction.text import strip_tags", - "from sklearn.feature_extraction.text import strip_accents_unicode", - "from sklearn.feature_extraction.text import strip_accents_ascii", - "from sklearn.feature_extraction.text import HashingVectorizer", - "from sklearn.feature_extraction.text import CountVectorizer", - "from sklearn.feature_extraction.text import TfidfTransformer", - "from sklearn.feature_extraction.text import TfidfVectorizer", - "from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.pipeline import Pipeline", - "from sklearn.svm import LinearSVC", - "from sklearn.base import clone", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import fails_if_pypy", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import skip_if_32bit", - "from collections import defaultdict", - "from functools import partial", - "import pickle", - "from io import StringIO" - ], - "classes": [], - "functions": [ - { - "name": "uppercase", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "strip_eacute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split_tokenize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "lazy_analyze", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_strip_accents", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_to_ascii", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_word_analyzer_unigrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_word_analyzer_unigrams_and_bigrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unicode_decode_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_char_ngram_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_char_wb_ngram_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_word_ngram_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary_repeated_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary_gap_index", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_stop_words", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_empty_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_countvectorizer_twice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_token_pattern", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check `get_feature_names()` when a custom token pattern is passed.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/12971" - }, - { - "name": "test_countvectorizer_custom_token_pattern_with_several_group", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we raise an error if token pattern capture several groups.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/12971" - }, - { - "name": "test_tf_idf_smoothing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_no_smoothing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sublinear_tf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_setters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashing_vectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_vectorizer_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_max_df", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_min_df", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_binary_occurrences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashed_binary_occurrences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_vectorizer_pipeline_grid_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_pipeline_grid_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_pipeline_cross_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_unicode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_with_fixed_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_vectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_built_processors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tokenizers cannot be pickled\nhttps://github.com/scikit-learn/scikit-learn/issues/12833" - }, - { - "name": "test_countvectorizer_vocab_sets_when_pickling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_vocab_dicts_when_pickling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stop_words_removal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transformer_idf_setter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_setter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidfvectorizer_invalid_idf_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_unique_vocab", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashingvectorizer_nan_in_docs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidfvectorizer_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidfvectorizer_export_idf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_vocab_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_string_object_as_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_transformer_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_transformer_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizers_invalid_ngram_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_stop_words_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_stop_words_inconsistent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_sort_features_64bit_sparse_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that CountVectorizer._sort_features preserves the dtype of its sparse\nfeature matrix.\n\nThis test is skipped on 32bit platforms, see:\n https://github.com/scikit-learn/scikit-learn/pull/11295\nfor more details." - }, - { - "name": "test_stop_word_validation_custom_preprocessor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_analyzer_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_analyzer_change_behavior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_analyzer_reraise_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unused_parameters_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tie_breaking_sample_order_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_selection._base", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from warnings import warn", - "from operator import attrgetter", - "import numpy as np", - "from scipy.sparse import issparse", - "from scipy.sparse import csc_matrix", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import safe_mask", - "from utils import safe_sqr", - "from utils._tags import _safe_tags" - ], - "classes": [ - { - "name": "SelectorMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "get_support", - "decorators": [], - "parameters": [ - { - "name": "indices", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the return value will be an array of integers, rather than a boolean mask." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a mask, or integer index, of the features selected\n\nParameters\n----------\nindices : bool, default=False\n If True, the return value will be an array of integers, rather\n than a boolean mask.\n\nReturns\n-------\nsupport : array\n An index that selects the retained features from a feature vector.\n If `indices` is False, this is a boolean array of shape\n [# input features], in which an element is True iff its\n corresponding feature is selected for retention. If `indices` is\n True, this is an integer array of shape [# output features] whose\n values are indices into the input feature vector." - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Get the boolean mask indicating which features are selected\n\nReturns\n-------\nsupport : boolean array of shape [# input features]\n An element is True iff its corresponding feature is selected for\n retention." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce X to the selected features.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\nX_r : array of shape [n_samples, n_selected_features]\n The input samples with only the selected features." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reverse the transformation operation\n\nParameters\n----------\nX : array of shape [n_samples, n_selected_features]\n The input samples.\n\nReturns\n-------\nX_r : array of shape [n_samples, n_original_features]\n `X` with columns of zeros inserted where features would have\n been removed by :meth:`transform`." - } - ], - "docstring": "Transformer mixin that performs feature selection given a support mask\n\nThis mixin provides a feature selector implementation with `transform` and\n`inverse_transform` functionality given an implementation of\n`_get_support_mask`." - } - ], - "functions": [ - { - "name": "_get_feature_importances", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A scikit-learn estimator from which we want to get the feature importances." - }, - { - "name": "getter", - "type": "Union[Callable, str, Literal[\"auto\"]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An attribute or a callable to get the feature importance. If `\"auto\"`, `estimator` is expected to expose `coef_` or `feature_importances`." - }, - { - "name": "transform_func", - "type": "Literal[\"norm\", \"square\"]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transform to apply to the feature importances. By default (`None`) no transformation is applied." - }, - { - "name": "norm_order", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The norm order to apply when `transform_func=\"norm\"`. Only applied when `importances.ndim > 1`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Retrieve and aggregate (ndim > 1) the feature importances\nfrom an estimator. Also optionally applies transformation.\n\nParameters\n----------\nestimator : estimator\n A scikit-learn estimator from which we want to get the feature\n importances.\n\ngetter : \"auto\", str or callable\n An attribute or a callable to get the feature importance. If `\"auto\"`,\n `estimator` is expected to expose `coef_` or `feature_importances`.\n\ntransform_func : {\"norm\", \"square\"}, default=None\n The transform to apply to the feature importances. By default (`None`)\n no transformation is applied.\n\nnorm_order : int, default=1\n The norm order to apply when `transform_func=\"norm\"`. Only applied\n when `importances.ndim > 1`.\n\nReturns\n-------\nimportances : ndarray of shape (n_features,)\n The features importances, optionally transformed." - } - ] - }, - { - "name": "sklearn.feature_selection._from_model", - "imports": [ - "import numpy as np", - "import numbers", - "from _base import SelectorMixin", - "from _base import _get_feature_importances", - "from base import BaseEstimator", - "from base import clone", - "from base import MetaEstimatorMixin", - "from utils._tags import _safe_tags", - "from utils.validation import check_is_fitted", - "from exceptions import NotFittedError", - "from utils.metaestimators import if_delegate_has_method", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "SelectFromModel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the transformer is built. This can be both a fitted (if ``prefit`` is set to True) or a non-fitted estimator. The estimator must have either a ``feature_importances_`` or ``coef_`` attribute after fitting." - }, - { - "name": "threshold", - "type": "Union[float, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The threshold value to use for feature selection. Features whose importance is greater or equal are kept while the others are discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is the median (resp. the mean) of the feature importances. A scaling factor (e.g., \"1.25*mean\") may also be used. If None and if the estimator has a parameter penalty set to l1, either explicitly or implicitly (e.g, Lasso), the threshold used is 1e-5. Otherwise, \"mean\" is used by default." - }, - { - "name": "prefit", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a prefit model is expected to be passed into the constructor directly or not. If True, ``transform`` must be called directly and SelectFromModel cannot be used with ``cross_val_score``, ``GridSearchCV`` and similar utilities that clone the estimator. Otherwise train the model using ``fit`` and then ``transform`` to do feature selection." - }, - { - "name": "norm_order", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Order of the norm used to filter the vectors of coefficients below ``threshold`` in the case where the ``coef_`` attribute of the estimator is of dimension 2." - }, - { - "name": "max_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of features to select. To only select based on ``max_features``, set ``threshold=-np.inf``. .. versionadded:: 0.20" - }, - { - "name": "importance_getter", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', uses the feature importance either through a ``coef_`` attribute or ``feature_importances_`` attribute of estimator. Also accepts a string that specifies an attribute name/path for extracting feature importance (implemented with `attrgetter`). For example, give `regressor_.coef_` in case of :class:`~sklearn.compose.TransformedTargetRegressor` or `named_steps.clf.feature_importances_` in case of :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. If `callable`, overrides the default feature importance getter. The callable is passed with the fitted estimator and it should return importance for each feature. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (integers that correspond to classes in classification, real numbers in regression)." - }, - { - "name": "**fit_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the SelectFromModel meta-transformer.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n**fit_params : Other estimator specific parameters\n\nReturns\n-------\nself : object" - }, - { - "name": "threshold_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (integers that correspond to classes in classification, real numbers in regression)." - }, - { - "name": "**fit_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the SelectFromModel meta-transformer only once.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n**fit_params : Other estimator specific parameters\n\nReturns\n-------\nself : object" - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object\n The base estimator from which the transformer is built.\n This can be both a fitted (if ``prefit`` is set to True)\n or a non-fitted estimator. The estimator must have either a\n ``feature_importances_`` or ``coef_`` attribute after fitting.\n\nthreshold : string or float, default=None\n The threshold value to use for feature selection. Features whose\n importance is greater or equal are kept while the others are\n discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is\n the median (resp. the mean) of the feature importances. A scaling\n factor (e.g., \"1.25*mean\") may also be used. If None and if the\n estimator has a parameter penalty set to l1, either explicitly\n or implicitly (e.g, Lasso), the threshold used is 1e-5.\n Otherwise, \"mean\" is used by default.\n\nprefit : bool, default=False\n Whether a prefit model is expected to be passed into the constructor\n directly or not. If True, ``transform`` must be called directly\n and SelectFromModel cannot be used with ``cross_val_score``,\n ``GridSearchCV`` and similar utilities that clone the estimator.\n Otherwise train the model using ``fit`` and then ``transform`` to do\n feature selection.\n\nnorm_order : non-zero int, inf, -inf, default=1\n Order of the norm used to filter the vectors of coefficients below\n ``threshold`` in the case where the ``coef_`` attribute of the\n estimator is of dimension 2.\n\nmax_features : int, default=None\n The maximum number of features to select.\n To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n .. versionadded:: 0.20\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a ``coef_``\n attribute or ``feature_importances_`` attribute of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : an estimator\n The base estimator from which the transformer is built.\n This is stored only when a non-fitted estimator is passed to the\n ``SelectFromModel``, i.e when prefit is False.\n\nthreshold_ : float\n The threshold value used for feature selection.\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SelectFromModel\n>>> from sklearn.linear_model import LogisticRegression\n>>> X = [[ 0.87, -1.34, 0.31 ],\n... [-2.79, -0.02, -0.85 ],\n... [-1.34, -0.48, -2.55 ],\n... [ 1.92, 1.48, 0.65 ]]\n>>> y = [0, 1, 0, 1]\n>>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n>>> selector.estimator_.coef_\narray([[-0.3252302 , 0.83462377, 0.49750423]])\n>>> selector.threshold_\n0.55245...\n>>> selector.get_support()\narray([False, True, False])\n>>> selector.transform(X)\narray([[-1.34],\n [-0.02],\n [-0.48],\n [ 1.48]])\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\nSequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights." - } - ], - "functions": [ - { - "name": "_calculate_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Interpret the threshold value" - } - ] - }, - { - "name": "sklearn.feature_selection._mutual_info", - "imports": [ - "import numpy as np", - "from scipy.sparse import issparse", - "from scipy.special import digamma", - "from metrics.cluster import mutual_info_score", - "from neighbors import NearestNeighbors", - "from neighbors import KDTree", - "from preprocessing import scale", - "from utils import check_random_state", - "from utils.fixes import _astype_copy_false", - "from utils.validation import check_array", - "from utils.validation import check_X_y", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets" - ], - "classes": [], - "functions": [ - { - "name": "_compute_mi_cc", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of two continuous random variables, must have an identical shape." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of two continuous random variables, must have an identical shape." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nearest neighbors to search for each point, see [1]_." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mutual information between two continuous variables.\n\nParameters\n----------\nx, y : ndarray, shape (n_samples,)\n Samples of two continuous random variables, must have an identical\n shape.\n\nn_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004." - }, - { - "name": "_compute_mi_cd", - "decorators": [], - "parameters": [ - { - "name": "c", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of a continuous random variable." - }, - { - "name": "d", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of a discrete random variable." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nearest neighbors to search for each point, see [1]_." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mutual information between continuous and discrete variables.\n\nParameters\n----------\nc : ndarray, shape (n_samples,)\n Samples of a continuous random variable.\n\nd : ndarray, shape (n_samples,)\n Samples of a discrete random variable.\n\nn_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014." - }, - { - "name": "_compute_mi", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mutual information between two variables.\n\nThis is a simple wrapper which selects a proper function to call based on\nwhether `x` and `y` are discrete or not." - }, - { - "name": "_iterate_columns", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix over which to iterate." - }, - { - "name": "columns", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of columns to iterate over. If None, iterate over all columns." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Iterate over columns of a matrix.\n\nParameters\n----------\nX : ndarray or csc_matrix, shape (n_samples, n_features)\n Matrix over which to iterate.\n\ncolumns : iterable or None, default=None\n Indices of columns to iterate over. If None, iterate over all columns.\n\nYields\n------\nx : ndarray, shape (n_samples,)\n Columns of `X` in dense format." - }, - { - "name": "_estimate_mi", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "discrete_features", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If bool, then determines whether to consider all features discrete or continuous. If array, then it should be either a boolean mask with shape (n_features,) or array with indices of discrete features. If 'auto', it is assigned to False for dense `X` and to True for sparse `X`." - }, - { - "name": "discrete_target", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to consider `y` as a discrete variable." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use for MI estimation for continuous variables, see [1]_ and [2]_. Higher values reduce variance of the estimation, but could introduce a bias." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of the given data. If set to False, the initial data will be overwritten." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for adding small noise to continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate mutual information between the features and the target.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\ndiscrete_target : bool, default=False\n Whether to consider `y` as a discrete variable.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n A negative value will be replaced by 0.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014." - }, - { - "name": "mutual_info_regression", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "discrete_features", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If bool, then determines whether to consider all features discrete or continuous. If array, then it should be either a boolean mask with shape (n_features,) or array with indices of discrete features. If 'auto', it is assigned to False for dense `X` and to True for sparse `X`." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use for MI estimation for continuous variables, see [2]_ and [3]_. Higher values reduce variance of the estimation, but could introduce a bias." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of the given data. If set to False, the initial data will be overwritten." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for adding small noise to continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate mutual information for a continuous target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\nNotes\n-----\n1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\nReferences\n----------\n.. [1] `Mutual Information\n `_\n on Wikipedia.\n.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n.. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector\", Probl. Peredachi Inf., 23:2 (1987), 9-16" - }, - { - "name": "mutual_info_classif", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "discrete_features", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If bool, then determines whether to consider all features discrete or continuous. If array, then it should be either a boolean mask with shape (n_features,) or array with indices of discrete features. If 'auto', it is assigned to False for dense `X` and to True for sparse `X`." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use for MI estimation for continuous variables, see [2]_ and [3]_. Higher values reduce variance of the estimation, but could introduce a bias." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of the given data. If set to False, the initial data will be overwritten." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for adding small noise to continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate mutual information for a discrete target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\nNotes\n-----\n1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\nReferences\n----------\n.. [1] `Mutual Information\n `_\n on Wikipedia.\n.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n.. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector:, Probl. Peredachi Inf., 23:2 (1987), 9-16" - } - ] - }, - { - "name": "sklearn.feature_selection._rfe", - "imports": [ - "import numpy as np", - "import numbers", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from utils.metaestimators import if_delegate_has_method", - "from utils.metaestimators import _safe_split", - "from utils._tags import _safe_tags", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from base import clone", - "from base import is_classifier", - "from model_selection import check_cv", - "from model_selection._validation import _score", - "from metrics import check_scoring", - "from _base import SelectorMixin", - "from _base import _get_feature_importances" - ], - "classes": [ - { - "name": "RFE", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A supervised learning estimator with a ``fit`` method that provides information about feature importance (e.g. `coef_`, `feature_importances_`)." - }, - { - "name": "n_features_to_select", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to select. If `None`, half of the features are selected. If integer, the parameter is the absolute number of features to select. If float between 0 and 1, it is the fraction of features to select. .. versionchanged:: 0.24 Added float values for fractions." - }, - { - "name": "step", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "If greater than or equal to 1, then ``step`` corresponds to the (integer) number of features to remove at each iteration. If within (0.0, 1.0), then ``step`` corresponds to the percentage (rounded down) of features to remove at each iteration." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls verbosity of output." - }, - { - "name": "importance_getter", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', uses the feature importance either through a `coef_` or `feature_importances_` attributes of estimator. Also accepts a string that specifies an attribute name/path for extracting feature importance (implemented with `attrgetter`). For example, give `regressor_.coef_` in case of :class:`~sklearn.compose.TransformedTargetRegressor` or `named_steps.clf.feature_importances_` in case of class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. If `callable`, overrides the default feature importance getter. The callable is passed with the fitted estimator and it should return importance for each feature. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimator_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the RFE model and then the underlying estimator on the selected\n features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce X to the selected features and then predict using the\n underlying estimator.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\ny : array of shape [n_samples]\n The predicted target values." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce X to the selected features and then return the score of the\n underlying estimator.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\ny : array of shape [n_samples]\n The target values." - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : array, shape = [n_samples, n_classes] or [n_samples]\n The decision function of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification produce an array of shape\n [n_samples]." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\np : array of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\np : array of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance\n (e.g. `coef_`, `feature_importances_`).\n\nn_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\n .. versionchanged:: 0.24\n Added float values for fractions.\n\nstep : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n\nverbose : int, default=0\n Controls verbosity of output.\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\nn_features_ : int\n The number of selected features.\n\nranking_ : ndarray of shape (n_features,)\n The feature ranking, such that ``ranking_[i]`` corresponds to the\n ranking position of the i-th feature. Selected (i.e., estimated\n best) features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n The mask of selected features.\n\nExamples\n--------\nThe following example shows how to retrieve the 5 most informative\nfeatures in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFE\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFE(estimator, n_features_to_select=5, step=1)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True, True, True, True, True, False, False, False, False,\n False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nSee Also\n--------\nRFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n weights.\nSequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002." - }, - { - "name": "RFECV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A supervised learning estimator with a ``fit`` method that provides information about feature importance either through a ``coef_`` attribute or through a ``feature_importances_`` attribute." - }, - { - "name": "step", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "If greater than or equal to 1, then ``step`` corresponds to the (integer) number of features to remove at each iteration. If within (0.0, 1.0), then ``step`` corresponds to the percentage (rounded down) of features to remove at each iteration. Note that the last iteration may remove fewer than ``step`` features in order to reach ``min_features_to_select``." - }, - { - "name": "min_features_to_select", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of features to be selected. This number of features will always be scored, even if the difference between the original feature count and ``min_features_to_select`` isn't divisible by ``step``. .. versionadded:: 0.20" - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. If the estimator is a classifier or if ``y`` is neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value of None changed from 3-fold to 5-fold." - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls verbosity of output." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of cores to run in parallel while fitting across folds. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18" - }, - { - "name": "importance_getter", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', uses the feature importance either through a `coef_` or `feature_importances_` attributes of estimator. Also accepts a string that specifies an attribute name/path for extracting feature importance. For example, give `regressor_.coef_` in case of :class:`~sklearn.compose.TransformedTargetRegressor` or `named_steps.clf.feature_importances_` in case of :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. If `callable`, overrides the default feature importance getter. The callable is passed with the fitted estimator and it should return importance for each feature. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where `n_samples` is the number of samples and `n_features` is the total number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers for classification, real numbers for regression)." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the RFE model and automatically tune the number of selected\n features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where `n_samples` is the number of samples and\n `n_features` is the total number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers for classification, real numbers for\n regression).\n\ngroups : array-like of shape (n_samples,) or None, default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n .. versionadded:: 0.20" - } - ], - "docstring": "Feature ranking with recursive feature elimination and cross-validated\nselection of the best number of features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance either through a ``coef_``\n attribute or through a ``feature_importances_`` attribute.\n\nstep : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n Note that the last iteration may remove fewer than ``step`` features in\n order to reach ``min_features_to_select``.\n\nmin_features_to_select : int, default=1\n The minimum number of features to be selected. This number of features\n will always be scored, even if the difference between the original\n feature count and ``min_features_to_select`` isn't divisible by\n ``step``.\n\n .. versionadded:: 0.20\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n estimator is a classifier or if ``y`` is neither binary nor multiclass,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value of None changed from 3-fold to 5-fold.\n\nscoring : string, callable or None, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nverbose : int, default=0\n Controls verbosity of output.\n\nn_jobs : int or None, default=None\n Number of cores to run in parallel while fitting across folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance.\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\ngrid_scores_ : ndarray of shape (n_subsets_of_features,)\n The cross-validation scores such that\n ``grid_scores_[i]`` corresponds to\n the CV score of the i-th subset of features.\n\nn_features_ : int\n The number of selected features with cross-validation.\n\nranking_ : narray of shape (n_features,)\n The feature ranking, such that `ranking_[i]`\n corresponds to the ranking\n position of the i-th feature.\n Selected (i.e., estimated best)\n features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n The mask of selected features.\n\nNotes\n-----\nThe size of ``grid_scores_`` is equal to\n``ceil((n_features - min_features_to_select) / step) + 1``,\nwhere step is the number of features removed at each iteration.\n\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\nThe following example shows how to retrieve the a-priori not known 5\ninformative features in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFECV\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFECV(estimator, step=1, cv=5)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True, True, True, True, True, False, False, False, False,\n False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\nSee Also\n--------\nRFE : Recursive feature elimination.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002." - } - ], - "functions": [ - { - "name": "_rfe_single_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the score for a fit across one fold." - } - ] - }, - { - "name": "sklearn.feature_selection._sequential", - "imports": [ - "import numbers", - "import numpy as np", - "from _base import SelectorMixin", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from base import clone", - "from utils._tags import _safe_tags", - "from utils.validation import check_is_fitted", - "from model_selection import cross_val_score" - ], - "classes": [ - { - "name": "SequentialFeatureSelector", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An unfitted estimator." - }, - { - "name": "n_features_to_select", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to select. If `None`, half of the features are selected. If integer, the parameter is the absolute number of features to select. If float between 0 and 1, it is the fraction of features to select." - }, - { - "name": "direction: {'forward'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform forward selection or backward selection." - }, - { - "name": "'backward'}", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform forward selection or backward selection." - }, - { - "name": "default='forward'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform forward selection or backward selection." - }, - { - "name": "scoring", - "type": "Union[Callable, str, Dict]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. If None, the estimator's score method is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. When evaluating a new feature to add or remove, the cross-validation procedure is parallel over the folds. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn the features to select.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors.\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : object" - }, - { - "name": "_get_best_new_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n An unfitted estimator.\n\nn_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\ndirection: {'forward', 'backward'}, default='forward'\n Whether to perform forward selection or backward selection.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. When evaluating a new feature to\n add or remove, the cross-validation procedure is parallel over the\n folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nn_features_to_select_ : int\n The number of features that were selected.\n\nsupport_ : ndarray of shape (n_features,), dtype=bool\n The mask of selected features.\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination based on importance weights, with\n automatic selection of the number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n weights.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SequentialFeatureSelector\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n>>> sfs.fit(X, y)\nSequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n n_features_to_select=3)\n>>> sfs.get_support()\narray([ True, False, True, True])\n>>> sfs.transform(X).shape\n(150, 3)" - } - ], - "functions": [] - }, - { - "name": "sklearn.feature_selection._univariate_selection", - "imports": [ - "import numpy as np", - "import warnings", - "from scipy import special", - "from scipy import stats", - "from scipy.sparse import issparse", - "from base import BaseEstimator", - "from preprocessing import LabelBinarizer", - "from utils import as_float_array", - "from utils import check_array", - "from utils import check_X_y", - "from utils import safe_sqr", - "from utils import safe_mask", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import row_norms", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from _base import SelectorMixin" - ], - "classes": [ - { - "name": "_BaseFilter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Run score function on (X, y) and get the appropriate features.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nReturns\n-------\nself : object" - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Initialize the univariate feature selection.\n\nParameters\n----------\nscore_func : callable\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores." - }, - { - "name": "SelectPercentile", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores. Default is f_classif (see below \"See Also\"). The default function only works with classification tasks. .. versionadded:: 0.18" - }, - { - "name": "percentile", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Percent of features to keep." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\npercentile : int, default=10\n Percent of features to keep.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectPercentile, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n>>> X_new.shape\n(1797, 7)\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectKBest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores. Default is f_classif (see below \"See Also\"). The default function only works with classification tasks. .. versionadded:: 0.18" - }, - { - "name": "k", - "type": "Union[Literal[\"all\"], int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of top features to select. The \"all\" option bypasses selection, for use in a parameter search." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\nk : int or \"all\", default=10\n Number of top features to select.\n The \"all\" option bypasses selection, for use in a parameter search.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n>>> X_new.shape\n(1797, 20)\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectFpr", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below \"See Also\"). The default function only works with classification tasks." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "5e-2", - "limitation": null, - "ignored": false, - "docstring": "The highest p-value for features to be kept." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest p-value for features to be kept.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFpr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nmutual_info_classif: Mutual information for a discrete target.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectFdr", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below \"See Also\"). The default function only works with classification tasks." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "5e-2", - "limitation": null, - "ignored": false, - "docstring": "The highest uncorrected p-value for features to keep." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Filter: Select the p-values for an estimated false discovery rate\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFdr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/False_discovery_rate\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a contnuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectFwe", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below \"See Also\"). The default function only works with classification tasks." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "5e-2", - "limitation": null, - "ignored": false, - "docstring": "The highest uncorrected p-value for features to keep." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Filter: Select the p-values corresponding to Family-wise error rate\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFwe, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 15)\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "GenericUnivariateSelect", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). For modes 'percentile' or 'kbest' it can return a single array scores." - }, - { - "name": "mode", - "type": "Literal['percentile', 'k_best', 'fpr', 'fdr', 'fwe']", - "hasDefault": true, - "default": "'percentile'", - "limitation": null, - "ignored": false, - "docstring": "Feature selection mode." - }, - { - "name": "param", - "type": "float", - "hasDefault": true, - "default": "1e-5", - "limitation": null, - "ignored": false, - "docstring": "Parameter of the corresponding mode." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_selector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n a single array scores.\n\nmode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n Feature selection mode.\n\nparam : float or int depending on the feature selection mode, default=1e-5\n Parameter of the corresponding mode.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned scores only.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n>>> X_new = transformer.fit_transform(X, y)\n>>> X_new.shape\n(569, 20)\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate." - } - ], - "functions": [ - { - "name": "_clean_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fixes Issue #1240: NaNs can't be properly compared, so change them to the\nsmallest value of scores's dtype. -inf seems to be unreliable." - }, - { - "name": "f_oneway", - "decorators": [], - "parameters": [ - { - "name": "*args", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "sample1, sample2... The sample measurements should be given as arguments." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs a 1-way ANOVA.\n\nThe one-way ANOVA tests the null hypothesis that 2 or more groups have\nthe same population mean. The test is applied to samples from two or\nmore groups, possibly with differing sizes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*args : array-like, sparse matrices\n sample1, sample2... The sample measurements should be given as\n arguments.\n\nReturns\n-------\nF-value : float\n The computed F-value of the test.\np-value : float\n The associated p-value from the F-distribution.\n\nNotes\n-----\nThe ANOVA test has important assumptions that must be satisfied in order\nfor the associated p-value to be valid.\n\n1. The samples are independent\n2. Each sample is from a normally distributed population\n3. The population standard deviations of the groups are all equal. This\n property is known as homoscedasticity.\n\nIf these assumptions are not true for a given set of data, it may still be\npossible to use the Kruskal-Wallis H-test (`scipy.stats.kruskal`_) although\nwith some loss of power.\n\nThe algorithm is from Heiman[2], pp.394-7.\n\nSee ``scipy.stats.f_oneway`` that should give the same results while\nbeing less efficient.\n\nReferences\n----------\n\n.. [1] Lowry, Richard. \"Concepts and Applications of Inferential\n Statistics\". Chapter 14.\n http://faculty.vassar.edu/lowry/ch14pt1.html\n\n.. [2] Heiman, G.W. Research Methods in Statistics. 2002." - }, - { - "name": "f_classif", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of regressors that will be tested sequentially." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the ANOVA F-value for the provided sample.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} shape = [n_samples, n_features]\n The set of regressors that will be tested sequentially.\n\ny : array of shape(n_samples)\n The data matrix.\n\nReturns\n-------\nF : array, shape = [n_features,]\n The set of F values.\n\npval : array, shape = [n_features,]\n The set of p-values.\n\nSee Also\n--------\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks." - }, - { - "name": "_chisquare", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast replacement for scipy.stats.chisquare.\n\nVersion from https://github.com/scipy/scipy/pull/2525 with additional\noptimizations." - }, - { - "name": "chi2", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample vectors." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector (class labels)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute chi-squared stats between each non-negative feature and class.\n\nThis score can be used to select the n_features features with the\nhighest values for the test chi-squared statistic from X, which must\ncontain only non-negative features such as booleans or frequencies\n(e.g., term counts in document classification), relative to the classes.\n\nRecall that the chi-square test measures dependence between stochastic\nvariables, so using this function \"weeds out\" the features that are the\nmost likely to be independent of class and therefore irrelevant for\nclassification.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample vectors.\n\ny : array-like of shape (n_samples,)\n Target vector (class labels).\n\nReturns\n-------\nchi2 : array, shape = (n_features,)\n chi2 statistics of each feature.\npval : array, shape = (n_features,)\n p-values of each feature.\n\nNotes\n-----\nComplexity of this algorithm is O(n_classes * n_features).\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nf_regression : F-value between label/feature for regression tasks." - }, - { - "name": "f_regression", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of regressors that will be tested sequentially." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix" - }, - { - "name": "center", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, X and y will be centered." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Univariate linear regression tests.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThis is done in 2 steps:\n\n1. The correlation between each regressor and the target is computed,\n that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *\n std(y)).\n2. It is converted to an F score then to a p-value.\n\nFor more on usage see the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} shape = (n_samples, n_features)\n The set of regressors that will be tested sequentially.\n\ny : array of shape(n_samples).\n The data matrix\n\ncenter : bool, default=True\n If true, X and y will be centered.\n\nReturns\n-------\nF : array, shape=(n_features,)\n F values of features.\n\npval : array, shape=(n_features,)\n p-values of F-scores.\n\nSee Also\n--------\nmutual_info_regression : Mutual information for a continuous target.\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nSelectPercentile : Select features based on percentile of the highest\n scores." - } - ] - }, - { - "name": "sklearn.feature_selection._variance_threshold", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from _base import SelectorMixin", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.sparsefuncs import min_max_axis", - "from utils.validation import check_is_fitted" - ], - "classes": [ - { - "name": "VarianceThreshold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Features with a training-set variance lower than this threshold will be removed. The default is to keep all features with non-zero variance, i.e. remove the features that have the same value in all samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample vectors from which to compute variances." - }, - { - "name": "y", - "type": "Any", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with sklearn.pipeline.Pipeline." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn empirical variances from X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Sample vectors from which to compute variances.\n\ny : any, default=None\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\nReturns\n-------\nself" - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nthreshold : float, default=0\n Features with a training-set variance lower than this threshold will\n be removed. The default is to keep all features with non-zero variance,\n i.e. remove the features that have the same value in all samples.\n\nAttributes\n----------\nvariances_ : array, shape (n_features,)\n Variances of individual features.\n\nNotes\n-----\nAllows NaN in the input.\nRaises ValueError if no feature in X meets the variance threshold.\n\nExamples\n--------\nThe following dataset has integer features, two of which are the same\nin every sample. These are removed with the default setting for threshold::\n\n >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n >>> selector = VarianceThreshold()\n >>> selector.fit_transform(X)\n array([[2, 0],\n [1, 4],\n [1, 1]])" - } - ], - "functions": [] - }, - { - "name": "sklearn.feature_selection", - "imports": [ - "from _univariate_selection import chi2", - "from _univariate_selection import f_classif", - "from _univariate_selection import f_oneway", - "from _univariate_selection import f_regression", - "from _univariate_selection import SelectPercentile", - "from _univariate_selection import SelectKBest", - "from _univariate_selection import SelectFpr", - "from _univariate_selection import SelectFdr", - "from _univariate_selection import SelectFwe", - "from _univariate_selection import GenericUnivariateSelect", - "from _variance_threshold import VarianceThreshold", - "from _rfe import RFE", - "from _rfe import RFECV", - "from _from_model import SelectFromModel", - "from _sequential import SequentialFeatureSelector", - "from _mutual_info import mutual_info_regression", - "from _mutual_info import mutual_info_classif", - "from _base import SelectorMixin" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_selection.tests.test_base", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import sparse as sp", - "from numpy.testing import assert_array_equal", - "from sklearn.base import BaseEstimator", - "from sklearn.feature_selection._base import SelectorMixin", - "from sklearn.utils import check_array" - ], - "classes": [ - { - "name": "StepSelector", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Retain every `step` features (beginning with 0)" - } - ], - "functions": [ - { - "name": "test_transform_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_chi2", - "imports": [ - "import warnings", - "import numpy as np", - "import pytest", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csr_matrix", - "import scipy.stats", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import chi2", - "from sklearn.feature_selection._univariate_selection import _chisquare", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "mkchi2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make k-best chi2 selector" - }, - { - "name": "test_chi2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi2_coo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi2_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi2_unused_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chisquare", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_feature_select", - "imports": [ - "import itertools", - "import warnings", - "import numpy as np", - "from scipy import stats", - "from scipy import sparse", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils import safe_mask", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.feature_selection import chi2", - "from sklearn.feature_selection import f_classif", - "from sklearn.feature_selection import f_oneway", - "from sklearn.feature_selection import f_regression", - "from sklearn.feature_selection import mutual_info_classif", - "from sklearn.feature_selection import mutual_info_regression", - "from sklearn.feature_selection import SelectPercentile", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import SelectFpr", - "from sklearn.feature_selection import SelectFdr", - "from sklearn.feature_selection import SelectFwe", - "from sklearn.feature_selection import GenericUnivariateSelect" - ], - "classes": [], - "functions": [ - { - "name": "test_f_oneway_vs_scipy_stats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_oneway_ints", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_regression_input_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_regression_center", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_classif_multi_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_classif_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_all", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_heuristics_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_best_scores_kept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_regression_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_percentile", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_heuristics_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_boundary_case_ch2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_fdr_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_fwe_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_selectkbest_tiebreaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_selectpercentile_tiebreaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tied_pvalues", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorefunc_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tied_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_func_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_classif_constant_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_feature_selected", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_from_model", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn import datasets", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import Lasso", - "from sklearn.svm import LinearSVC", - "from sklearn.feature_selection import SelectFromModel", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.linear_model import PassiveAggressiveClassifier", - "from sklearn.base import BaseEstimator", - "from sklearn.pipeline import make_pipeline", - "from sklearn.decomposition import PCA" - ], - "classes": [ - { - "name": "NaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoNaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NaNTagRandomForest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "FixedImportanceEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_estimator_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features_dim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features_tiebreak", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_threshold_and_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coef_default_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2d_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calling_fit_reinitializes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prefit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_threshold_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_threshold_without_refitting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_accepts_nan_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_accepts_nan_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allow_nan_tag_comes_from_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pca_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importance_getter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_mutual_info", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csr_matrix", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.feature_selection._mutual_info import _compute_mi", - "from sklearn.feature_selection import mutual_info_regression", - "from sklearn.feature_selection import mutual_info_classif" - ], - "classes": [], - "functions": [ - { - "name": "test_compute_mi_dd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_mi_cc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_mi_cd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_mi_cd_unique_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_classif_discrete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_classif_mixed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_options", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_rfe", - "imports": [ - "from operator import attrgetter", - "import pytest", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from scipy import sparse", - "from sklearn.feature_selection import RFE", - "from sklearn.feature_selection import RFECV", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_friedman1", - "from sklearn.metrics import zero_one_loss", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.svm import LinearSVR", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import GroupKFold", - "from sklearn.compose import TransformedTargetRegressor", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import get_scorer", - "from io import StringIO", - "import sys" - ], - "classes": [ - { - "name": "MockClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test recursive feature elimination" - } - ], - "functions": [ - { - "name": "test_rfe_features_importance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_invalid_n_features_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_percent_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_mockclassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv_mockclassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv_verbose_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv_grid_scores_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_estimator_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_min_step", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_number_of_subsets_of_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_cv_n_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_cv_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_wrapped_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_importance_getter_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_allow_nan_inf_in_x", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_w_pipeline_2d_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_sequential", - "imports": [ - "import pytest", - "import scipy", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.pipeline import make_pipeline", - "from sklearn.feature_selection import SequentialFeatureSelector", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_bad_n_features_to_select", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_direction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_to_select", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_to_select_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_variance_threshold", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from scipy.sparse import bsr_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from sklearn.feature_selection import VarianceThreshold" - ], - "classes": [], - "functions": [ - { - "name": "test_zero_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_variance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_variance_floating_point_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_variance_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.gaussian_process.kernels", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from collections import namedtuple", - "import math", - "from inspect import signature", - "import numpy as np", - "from scipy.special import kv", - "from scipy.special import gamma", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import cdist", - "from scipy.spatial.distance import squareform", - "from metrics.pairwise import pairwise_kernels", - "from base import clone", - "from utils.validation import _num_samples", - "import warnings", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "Hyperparameter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__new__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A kernel hyperparameter's specification in form of a namedtuple.\n\n.. versionadded:: 0.18\n\nAttributes\n----------\nname : str\n The name of the hyperparameter. Note that a kernel using a\n hyperparameter with name \"x\" must have the attributes self.x and\n self.x_bounds\n\nvalue_type : str\n The type of the hyperparameter. Currently, only \"numeric\"\n hyperparameters are supported.\n\nbounds : pair of floats >= 0 or \"fixed\"\n The lower and upper bound on the parameter. If n_elements>1, a pair\n of 1d array with n_elements each may be given alternatively. If\n the string \"fixed\" is passed as bounds, the hyperparameter's value\n cannot be changed.\n\nn_elements : int, default=1\n The number of elements of the hyperparameter value. Defaults to 1,\n which corresponds to a scalar hyperparameter. n_elements > 1\n corresponds to a hyperparameter which is vector-valued,\n such as, e.g., anisotropic length-scales.\n\nfixed : bool, default=None\n Whether the value of this hyperparameter is fixed, i.e., cannot be\n changed during hyperparameter tuning. If None is passed, the \"fixed\" is\n derived based on the given bounds.\n\nExamples\n--------\n>>> from sklearn.gaussian_process.kernels import ConstantKernel\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import Hyperparameter\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ConstantKernel(constant_value=1.0,\n... constant_value_bounds=(0.0, 10.0))\n\nWe can access each hyperparameter:\n\n>>> for hyperparameter in kernel.hyperparameters:\n... print(hyperparameter)\nHyperparameter(name='constant_value', value_type='numeric',\nbounds=array([[ 0., 10.]]), n_elements=1, fixed=False)\n\n>>> params = kernel.get_params()\n>>> for key in sorted(params): print(f\"{key} : {params[key]}\")\nconstant_value : 1.0\nconstant_value_bounds : (0.0, 10.0)" - }, - { - "name": "Kernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this kernel.\n\nThe method works on simple kernels as well as on nested kernels.\nThe latter have parameters of the form ``__``\nso that it's possible to update each component of a nested object.\n\nReturns\n-------\nself" - }, - { - "name": "clone_with_theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The hyperparameters" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a clone of self with given hyperparameters theta.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The hyperparameters" - }, - { - "name": "n_dims", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of non-fixed hyperparameters of the kernel." - }, - { - "name": "hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of all hyperparameter specifications." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__add__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__radd__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__mul__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__rmul__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__pow__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the kernel." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is defined on fixed-length feature\nvectors or generic objects. Defaults to True for backward\ncompatibility." - }, - { - "name": "_check_bounds_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called after fitting to warn if bounds may have been too tight." - } - ], - "docstring": "Base class for all kernels.\n\n.. versionadded:: 0.18" - }, - { - "name": "NormalizedKernelMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - } - ], - "docstring": "Mixin for kernels which are normalized: k(X, X)=1.\n\n.. versionadded:: 0.18" - }, - { - "name": "StationaryKernelMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - } - ], - "docstring": "Mixin for kernels which are stationary: k(X, Y)= f(X-Y).\n\n.. versionadded:: 0.18" - }, - { - "name": "GenericKernelMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Whether the kernel works only on fixed-length feature vectors." - } - ], - "docstring": "Mixin for kernels which operate on generic objects such as variable-\nlength sequences, trees, and graphs.\n\n.. versionadded:: 0.22" - }, - { - "name": "CompoundKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernels", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The other kernels" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : array of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : array of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nNote that this compound kernel returns the results of all simple kernel\nstacked along an additional axis.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of the\n kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims, n_kernels), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is defined on discrete structures. " - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X, n_kernels)\n Diagonal of kernel k(X, X)" - } - ], - "docstring": "Kernel which is composed of a set of other kernels.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernels : list of Kernels\n The other kernels\n\nExamples\n--------\n>>> from sklearn.gaussian_process.kernels import WhiteKernel\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> from sklearn.gaussian_process.kernels import CompoundKernel\n>>> kernel = CompoundKernel(\n... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)])\n>>> print(kernel.bounds)\n[[-11.51292546 11.51292546]\n [-11.51292546 11.51292546]]\n>>> print(kernel.n_dims)\n2\n>>> print(kernel.theta)\n[1.09861229 0.69314718]" - }, - { - "name": "KernelOperator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of all hyperparameter." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - } - ], - "docstring": "Base class for all kernel operators.\n\n.. versionadded:: 0.18" - }, - { - "name": "Sum", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "k1", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The first base-kernel of the sum-kernel" - }, - { - "name": "k2", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The second base-kernel of the sum-kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n The first base-kernel of the sum-kernel\n\nk2 : Kernel\n The second base-kernel of the sum-kernel\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Sum(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 + RBF(length_scale=1)" - }, - { - "name": "Product", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "k1", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The first base-kernel of the product-kernel" - }, - { - "name": "k2", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The second base-kernel of the product-kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_Y, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n The first base-kernel of the product-kernel\n\nk2 : Kernel\n The second base-kernel of the product-kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RBF, Product,\n... ConstantKernel)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Product(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 * RBF(length_scale=1)" - }, - { - "name": "Exponentiation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base kernel" - }, - { - "name": "exponent", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The exponent for the base kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of all hyperparameter." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_Y, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is defined on discrete structures. " - } - ], - "docstring": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : Kernel\n The base kernel\n\nexponent : float\n The exponent for the base kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RationalQuadratic,\n... Exponentiation)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Exponentiation(RationalQuadratic(), exponent=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.419...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([635.5...]), array([0.559...]))" - }, - { - "name": "ConstantKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "constant_value", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The constant value which defines the covariance: k(x_1, x_2) = constant_value" - }, - { - "name": "constant_value_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on `constant_value`. If set to \"fixed\", `constant_value` cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_constant_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n kernel = RBF() + 2\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nconstant_value : float, default=1.0\n The constant value which defines the covariance:\n k(x_1, x_2) = constant_value\n\nconstant_value_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on `constant_value`.\n If set to \"fixed\", `constant_value` cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = RBF() + ConstantKernel(constant_value=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3696...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([606.1...]), array([0.24...]))" - }, - { - "name": "WhiteKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "noise_level", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter controlling the noise level (variance)" - }, - { - "name": "noise_level_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'noise_level'. If set to \"fixed\", 'noise_level' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_noise_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nnoise_level : float, default=1.0\n Parameter controlling the noise level (variance)\n\nnoise_level_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'noise_level'.\n If set to \"fixed\", 'noise_level' cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1... ]), array([316.6..., 316.6...]))" - }, - { - "name": "RBF", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel. If a float, an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of l defines the length-scale of the respective feature dimension." - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "anisotropic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\n.. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8354..., 0.03228..., 0.1322...],\n [0.7906..., 0.0652..., 0.1441...]])" - }, - { - "name": "Matern", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel. If a float, an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of l defines the length-scale of the respective feature dimension." - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - }, - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The parameter nu controlling the smoothness of the learned function. The smaller nu, the less smooth the approximated function is. For nu=inf, the kernel becomes equivalent to the RBF kernel and for nu=0.5 to the absolute exponential kernel. Important intermediate values are nu=1.5 (once differentiable functions) and nu=2.5 (twice differentiable functions). Note that values of nu not in [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost (appr. 10 times higher) since they require to evaluate the modified Bessel function. Furthermore, in contrast to l, nu is kept fixed to its initial value and not optimized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n \\Bigg)^\\nu K_\\nu\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\n\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nnu : float, default=1.5\n The parameter nu controlling the smoothness of the learned function.\n The smaller nu, the less smooth the approximated function is.\n For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n nu=0.5 to the absolute exponential kernel. Important intermediate\n values are nu=1.5 (once differentiable functions) and nu=2.5\n (twice differentiable functions). Note that values of nu not in\n [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n (appr. 10 times higher) since they require to evaluate the modified\n Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n its initial value and not optimized.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8513..., 0.0368..., 0.1117...],\n [0.8086..., 0.0693..., 0.1220...]])" - }, - { - "name": "RationalQuadratic", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel." - }, - { - "name": "alpha", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Scale mixture parameter" - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - }, - { - "name": "alpha_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'alpha'. If set to \"fixed\", 'alpha' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\left(\n 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float > 0, default=1.0\n The length scale of the kernel.\n\nalpha : float > 0, default=1.0\n Scale mixture parameter\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nalpha_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'alpha'.\n If set to \"fixed\", 'alpha' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8881..., 0.0566..., 0.05518...],\n [0.8678..., 0.0707... , 0.0614...]])" - }, - { - "name": "ExpSineSquared", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel." - }, - { - "name": "periodicity", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The periodicity of the kernel." - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - }, - { - "name": "periodicity_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'periodicity'. If set to \"fixed\", 'periodicity' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the length scale" - }, - { - "name": "hyperparameter_periodicity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\text{exp}\\left(-\n \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\n\nlength_scale : float > 0, default=1.0\n The length scale of the kernel.\n\nperiodicity : float > 0, default=1.0\n The periodicity of the kernel.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nperiodicity_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'periodicity'.\n If set to \"fixed\", 'periodicity' cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import ExpSineSquared\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ExpSineSquared(length_scale=1, periodicity=1)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.0144...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))" - }, - { - "name": "DotProduct", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "sigma_0", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter controlling the inhomogenity of the kernel. If sigma_0=0, the kernel is homogenous." - }, - { - "name": "sigma_0_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'sigma_0'. If set to \"fixed\", 'sigma_0' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_sigma_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y).\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)." - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsigma_0 : float >= 0, default=1.0\n Parameter controlling the inhomogenity of the kernel. If sigma_0=0,\n the kernel is homogenous.\n\nsigma_0_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'sigma_0'.\n If set to \"fixed\", 'sigma_0' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))" - }, - { - "name": "PairwiseKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter gamma of the pairwise kernel specified by metric. It should be positive." - }, - { - "name": "gamma_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'gamma'. If set to \"fixed\", 'gamma' cannot be changed during hyperparameter tuning." - }, - { - "name": "metric", - "type": "Literal[\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - }, - { - "name": "pairwise_kernels_kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "All entries of this dict (if any) are passed as keyword arguments to the pairwise kernel function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_gamma", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n kernels support only isotropic distances. The parameter gamma is\n considered to be a hyperparameter and may be optimized. The other\n kernel parameters are set directly at initialization and are kept\n fixed.\n\n.. versionadded:: 0.18\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter gamma of the pairwise kernel specified by metric. It should\n be positive.\n\ngamma_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'gamma'.\n If set to \"fixed\", 'gamma' cannot be changed during\n hyperparameter tuning.\n\nmetric : {\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"} or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\npairwise_kernels_kwargs : dict, default=None\n All entries of this dict (if any) are passed as keyword arguments to\n the pairwise kernel function.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import PairwiseKernel\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = PairwiseKernel(metric='rbf')\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8880..., 0.05663..., 0.05532...],\n [0.8676..., 0.07073..., 0.06165...]])" - } - ], - "functions": [ - { - "name": "_check_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_approx_fprime", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process._gpc", - "imports": [ - "from operator import itemgetter", - "import numpy as np", - "from scipy.linalg import cholesky", - "from scipy.linalg import cho_solve", - "from scipy.linalg import solve", - "import scipy.optimize", - "from scipy.special import erf", - "from scipy.special import expit", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import clone", - "from kernels import RBF", - "from kernels import CompoundKernel", - "from kernels import ConstantKernel as C", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils import check_random_state", - "from utils.optimize import _check_optimize_result", - "from preprocessing import LabelEncoder", - "from multiclass import OneVsRestClassifier", - "from multiclass import OneVsOneClassifier", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "_BinaryGaussianProcessClassifierLaplace", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kernel specifying the covariance function of the GP. If None is passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that the kernel's hyperparameters are optimized during fitting." - }, - { - "name": "optimizer", - "type": "Union[Callable, Literal['fmin_l_bfgs_b']]", - "hasDefault": true, - "default": "'fmin_l_bfgs_b'", - "limitation": null, - "ignored": false, - "docstring": "Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be maximized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b'" - }, - { - "name": "n_restarts_optimizer", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer=0 implies that one run is performed." - }, - { - "name": "max_iter_predict", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations in Newton's method for approximating the posterior during predict. Smaller values will reduce computation time at the cost of worse results." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If warm-starts are enabled, the solution of the last Newton iteration on the Laplace approximation of the posterior mode is used as initialization for the next call of _posterior_mode(). This can speed up convergence when _posterior_mode is called several times on similar problems as in hyperparameter optimization. See :term:`the Glossary `." - }, - { - "name": "copy_X_train", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vectors or other representations of training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values, must be binary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian process classification model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n Target values, must be binary\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute ``classes_``." - }, - { - "name": "log_marginal_likelihood", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None." - }, - { - "name": "clone_kernel", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns log-marginal likelihood of theta for training data.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True." - }, - { - "name": "_posterior_mode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mode-finding for binary Laplace GPC and fixed kernel.\n\nThis approximates the posterior of the latent function values for given\ninputs and target observations with a Gaussian approximation and uses\nNewton's iteration to find the mode of this approximation." - }, - { - "name": "_constrained_optimization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\nmax_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\nwarm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\ny_train_ : array-like of shape (n_samples,)\n Target values in training data (also required for prediction)\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nkernel_ : kernl instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in X_train_\n\npi_ : array-like of shape (n_samples,)\n The probabilities of the positive class for the training points\n X_train_\n\nW_sr_ : array-like of shape (n_samples,)\n Square root of W, the Hessian of log-likelihood of the latent function\n values for the observed labels. Since W is diagonal, only the diagonal\n of sqrt(W) is stored.\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``" - }, - { - "name": "GaussianProcessClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kernel specifying the covariance function of the GP. If None is passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that the kernel's hyperparameters are optimized during fitting." - }, - { - "name": "optimizer", - "type": "Union[Callable, Literal['fmin_l_bfgs_b']]", - "hasDefault": true, - "default": "'fmin_l_bfgs_b'", - "limitation": null, - "ignored": false, - "docstring": "Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be maximized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b'" - }, - { - "name": "n_restarts_optimizer", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer=0 implies that one run is performed." - }, - { - "name": "max_iter_predict", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations in Newton's method for approximating the posterior during predict. Smaller values will reduce computation time at the cost of worse results." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If warm-starts are enabled, the solution of the last Newton iteration on the Laplace approximation of the posterior mode is used as initialization for the next call of _posterior_mode(). This can speed up convergence when _posterior_mode is called several times on similar problems as in hyperparameter optimization. See :term:`the Glossary `." - }, - { - "name": "copy_X_train", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "multi_class", - "type": "Literal['one_vs_rest', 'one_vs_one']", - "hasDefault": true, - "default": "'one_vs_rest'", - "limitation": null, - "ignored": false, - "docstring": "Specifies how multi-class classification problems are handled. Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest', one binary Gaussian process classifier is fitted for each class, which is trained to separate this class from the rest. In 'one_vs_one', one binary Gaussian process classifier is fitted for each pair of classes, which is trained to separate these two classes. The predictions of these binary predictors are combined into multi-class predictions. Note that 'one_vs_one' does not support predicting probability estimates." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the specified multiclass problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vectors or other representations of training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values, must be binary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian process classification model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n Target values, must be binary\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`." - }, - { - "name": "kernel_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "log_marginal_likelihood", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel hyperparameters for which the log-marginal likelihood is evaluated. In the case of multi-class classification, theta may be the hyperparameters of the compound kernel or of an individual kernel. In the latter case, all individual kernel get assigned the same theta values. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. Note that gradient computation is not supported for non-binary classification. If True, theta must not be None." - }, - { - "name": "clone_kernel", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns log-marginal likelihood of theta for training data.\n\nIn the case of multi-class classification, the mean log-marginal\nlikelihood of the one-versus-rest classifiers are returned.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. In the case of multi-class classification, theta may\n be the hyperparameters of the compound kernel or of an individual\n kernel. In the latter case, all individual kernel get assigned the\n same theta values. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. Note that gradient computation is not supported\n for non-binary classification. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True." - } - ], - "docstring": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\nmax_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\nwarm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nmulti_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n Specifies how multi-class classification problems are handled.\n Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n one binary Gaussian process classifier is fitted for each class, which\n is trained to separate this class from the rest. In 'one_vs_one', one\n binary Gaussian process classifier is fitted for each pair of classes,\n which is trained to separate these two classes. The predictions of\n these binary predictors are combined into multi-class predictions.\n Note that 'one_vs_one' does not support predicting probability\n estimates.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the specified\n multiclass problems are computed in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nbase_estimator_ : ``Estimator`` instance\n The estimator instance that defines the likelihood function\n using the observed data.\n\nkernel_ : kernel instance\n The kernel used for prediction. In case of binary classification,\n the structure of the kernel is the same as the one passed as parameter\n but with optimized hyperparameters. In case of multi-class\n classification, a CompoundKernel is returned which consists of the\n different kernels used in the one-versus-rest classifiers.\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nn_classes_ : int\n The number of classes in the training data\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.83548752, 0.03228706, 0.13222543],\n [0.79064206, 0.06525643, 0.14410151]])\n\n.. versionadded:: 0.18" - } - ], - "functions": [] - }, - { - "name": "sklearn.gaussian_process._gpr", - "imports": [ - "import warnings", - "from operator import itemgetter", - "import numpy as np", - "from scipy.linalg import cholesky", - "from scipy.linalg import cho_solve", - "from scipy.linalg import solve_triangular", - "import scipy.optimize", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import clone", - "from base import MultiOutputMixin", - "from kernels import RBF", - "from kernels import ConstantKernel as C", - "from utils import check_random_state", - "from utils.validation import check_array", - "from utils.optimize import _check_optimize_result", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "GaussianProcessRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kernel specifying the covariance function of the GP. If None is passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\" * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that the kernel hyperparameters are optimized during fitting unless the bounds are marked as \"fixed\"." - }, - { - "name": "alpha", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1e-10", - "limitation": null, - "ignored": false, - "docstring": "Value added to the diagonal of the kernel matrix during fitting. This can prevent a potential numerical issue during fitting, by ensuring that the calculated values form a positive definite matrix. It can also be interpreted as the variance of additional Gaussian measurement noise on the training observations. Note that this is different from using a `WhiteKernel`. If an array is passed, it must have the same number of entries as the data used for fitting and is used as datapoint-dependent noise level. Allowing to specify the noise level directly as a parameter is mainly for convenience and for consistency with Ridge." - }, - { - "name": "optimizer", - "type": "Union[Callable, Literal[\"fmin_l_bfgs_b\"]]", - "hasDefault": true, - "default": "\"fmin_l_bfgs_b\"", - "limitation": null, - "ignored": false, - "docstring": "Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be minimized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b'" - }, - { - "name": "n_restarts_optimizer", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer == 0 implies that one run is performed." - }, - { - "name": "normalize_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether the target values y are normalized, the mean and variance of the target values are set equal to 0 and 1 respectively. This is recommended for cases where zero-mean, unit-variance priors are used. Note that, in this implementation, the normalisation is reversed before the GP predictions are reported. .. versionchanged:: 0.23" - }, - { - "name": "copy_X_train", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vectors or other representations of training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian process regression model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the standard-deviation of the predictive distribution at the query points is returned along with the mean." - }, - { - "name": "return_cov", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the covariance of the joint predictive distribution at the query points is returned along with the mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the Gaussian process regression model\n\nWe can also predict based on an unfitted model by using the GP prior.\nIn addition to the mean of the predictive distribution, also its\nstandard deviation (return_std=True) or covariance (return_cov=True).\nNote that at most one of the two can be requested.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\nreturn_std : bool, default=False\n If True, the standard-deviation of the predictive distribution at\n the query points is returned along with the mean.\n\nreturn_cov : bool, default=False\n If True, the covariance of the joint predictive distribution at\n the query points is returned along with the mean.\n\nReturns\n-------\ny_mean : ndarray of shape (n_samples, [n_output_dims])\n Mean of predictive distribution a query points.\n\ny_std : ndarray of shape (n_samples,), optional\n Standard deviation of predictive distribution at query points.\n Only returned when `return_std` is True.\n\ny_cov : ndarray of shape (n_samples, n_samples), optional\n Covariance of joint predictive distribution a query points.\n Only returned when `return_cov` is True." - }, - { - "name": "sample_y", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of samples drawn from the Gaussian process" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation to randomly draw samples. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Draw samples from Gaussian process and evaluate at X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\nn_samples : int, default=1\n The number of samples drawn from the Gaussian process\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation to randomly draw samples.\n Pass an int for reproducible results across multiple function\n calls.\n See :term: `Glossary `.\n\nReturns\n-------\ny_samples : ndarray of shape (n_samples_X, [n_output_dims], n_samples)\n Values of n_samples samples drawn from Gaussian process and\n evaluated at query points." - }, - { - "name": "log_marginal_likelihood", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None." - }, - { - "name": "clone_kernel", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns log-marginal likelihood of theta for training data.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,) default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when eval_gradient is True." - }, - { - "name": "_constrained_optimization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of Gaussian Processes\nfor Machine Learning (GPML) by Rasmussen and Williams.\n\nIn addition to standard scikit-learn estimator API,\nGaussianProcessRegressor:\n\n * allows prediction without prior fitting (based on the GP prior)\n * provides an additional method sample_y(X), which evaluates samples\n drawn from the GPR (prior or posterior) at given inputs\n * exposes a method log_marginal_likelihood(theta), which can be used\n externally for other ways of selecting hyperparameters, e.g., via\n Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\"\n * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n the kernel hyperparameters are optimized during fitting unless the\n bounds are marked as \"fixed\".\n\nalpha : float or ndarray of shape (n_samples,), default=1e-10\n Value added to the diagonal of the kernel matrix during fitting.\n This can prevent a potential numerical issue during fitting, by\n ensuring that the calculated values form a positive definite matrix.\n It can also be interpreted as the variance of additional Gaussian\n measurement noise on the training observations. Note that this is\n different from using a `WhiteKernel`. If an array is passed, it must\n have the same number of entries as the data used for fitting and is\n used as datapoint-dependent noise level. Allowing to specify the\n noise level directly as a parameter is mainly for convenience and\n for consistency with Ridge.\n\noptimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be minimized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer == 0 implies that one\n run is performed.\n\nnormalize_y : bool, default=False\n Whether the target values y are normalized, the mean and variance of\n the target values are set equal to 0 and 1 respectively. This is\n recommended for cases where zero-mean, unit-variance priors are used.\n Note that, in this implementation, the normalisation is reversed\n before the GP predictions are reported.\n\n .. versionchanged:: 0.23\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\ny_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values in training data (also required for prediction)\n\nkernel_ : kernel instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in ``X_train_``\n\nalpha_ : array-like of shape (n_samples,)\n Dual coefficients of training data points in kernel space\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))" - } - ], - "functions": [] - }, - { - "name": "sklearn.gaussian_process", - "imports": [ - "from _gpr import GaussianProcessRegressor", - "from _gpc import GaussianProcessClassifier", - "from None import kernels" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.gaussian_process.tests.test_gpc", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy.optimize import approx_fprime", - "import pytest", - "from sklearn.gaussian_process import GaussianProcessClassifier", - "from sklearn.gaussian_process.kernels import RBF", - "from sklearn.gaussian_process.kernels import ConstantKernel as C", - "from sklearn.gaussian_process.kernels import WhiteKernel", - "from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message" - ], - "classes": [], - "functions": [ - { - "name": "f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_consistent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_consistent_structured", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_improving", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_without_cloning_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_converged_to_local_maximum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_starts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_class_n_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process.tests.test_gpr", - "imports": [ - "import sys", - "import numpy as np", - "import warnings", - "from scipy.optimize import approx_fprime", - "import pytest", - "from sklearn.gaussian_process import GaussianProcessRegressor", - "from sklearn.gaussian_process.kernels import RBF", - "from sklearn.gaussian_process.kernels import ConstantKernel as C", - "from sklearn.gaussian_process.kernels import WhiteKernel", - "from sklearn.gaussian_process.kernels import DotProduct", - "from sklearn.gaussian_process.kernels import ExpSineSquared", - "from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_warns_message" - ], - "classes": [], - "functions": [ - { - "name": "f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gpr_interpolation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gpr_interpolation_structured", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_improving", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_without_cloning_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_converged_to_local_maximum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_solution_inside_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_statistics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_cov_vs_std", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_anisotropic_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_starts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_y_normalization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test normalization of the target values in GP\n\nFitting non-normalizing GP on normalized y and fitting normalizing GP\non unnormalized y should yield identical results. Note that, here,\n'normalized y' refers to y that has been made zero mean and unit\nvariance." - }, - { - "name": "test_large_variance_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Here we test that, when noramlize_y=True, our GP can produce a\nsensible fit to training data whose variance is significantly\nlarger than unity. This test was made in response to issue #15612.\n\nGP predictions are verified against predictions that were made\nusing GPy which, here, is treated as the 'gold standard'. Note that we\nonly investigate the RBF kernel here, as that is what was used in the\nGPy implementation.\n\nThe following code can be used to recreate the GPy data:\n\n--------------------------------------------------------------------------\nimport GPy\n\nkernel_gpy = GPy.kern.RBF(input_dim=1, lengthscale=1.)\ngpy = GPy.models.GPRegression(X, np.vstack(y_large), kernel_gpy)\ngpy.optimize()\ny_pred_gpy, y_var_gpy = gpy.predict(X2)\ny_pred_std_gpy = np.sqrt(y_var_gpy)\n--------------------------------------------------------------------------" - }, - { - "name": "test_y_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gpr_correct_error_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_duplicate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_fit_default_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_K_inv_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bound_check_fixed_hyperparameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process.tests.test_kernels", - "imports": [ - "import pytest", - "import numpy as np", - "from inspect import signature", - "from sklearn.gaussian_process.kernels import _approx_fprime", - "from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS", - "from sklearn.metrics.pairwise import euclidean_distances", - "from sklearn.metrics.pairwise import pairwise_kernels", - "from sklearn.gaussian_process.kernels import RBF", - "from sklearn.gaussian_process.kernels import Matern", - "from sklearn.gaussian_process.kernels import RationalQuadratic", - "from sklearn.gaussian_process.kernels import ExpSineSquared", - "from sklearn.gaussian_process.kernels import DotProduct", - "from sklearn.gaussian_process.kernels import ConstantKernel", - "from sklearn.gaussian_process.kernels import WhiteKernel", - "from sklearn.gaussian_process.kernels import PairwiseKernel", - "from sklearn.gaussian_process.kernels import KernelOperator", - "from sklearn.gaussian_process.kernels import Exponentiation", - "from sklearn.gaussian_process.kernels import CompoundKernel", - "from sklearn.base import clone", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raise_message" - ], - "classes": [], - "functions": [ - { - "name": "test_kernel_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_theta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_vs_cross", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_operator_commutative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_anisotropic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_input_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compound_kernel_input_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_hyperparameters_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_clone_after_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matern_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_versus_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr_kernels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rational_quadratic_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process.tests._mini_sequence_kernel", - "imports": [ - "from sklearn.gaussian_process.kernels import Kernel", - "from sklearn.gaussian_process.kernels import Hyperparameter", - "from sklearn.gaussian_process.kernels import GenericKernelMixin", - "from sklearn.gaussian_process.kernels import StationaryKernelMixin", - "import numpy as np", - "from sklearn.base import clone" - ], - "classes": [ - { - "name": "MiniSeqKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_baseline_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_g", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "clone_with_theta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A minimal (but valid) convolutional kernel for sequences of variable\nlength." - } - ], - "functions": [] - }, - { - "name": "sklearn.gaussian_process.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.impute._base", - "imports": [ - "import numbers", - "import warnings", - "from collections import Counter", - "import numpy as np", - "import numpy.ma as ma", - "from scipy import sparse as sp", - "from scipy import stats", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.sparsefuncs import _get_median", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from utils._mask import _get_mask", - "from utils import is_scalar_nan" - ], - "classes": [ - { - "name": "_BaseImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a MissingIndicator." - }, - { - "name": "_transform_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the indicator mask.'\n\nNote that X must be the original data as passed to the imputer before\nany imputation, since imputation may be done inplace in some cases." - }, - { - "name": "_concatenate_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Concatenate indicator mask with the imputed data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`." - }, - { - "name": "SimpleImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "missing_values", - "type": "Optional[Union[float, int, str]]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." - }, - { - "name": "strategy", - "type": "str", - "hasDefault": true, - "default": "'mean'", - "limitation": null, - "ignored": false, - "docstring": "The imputation strategy. - If \"mean\", then replace missing values using the mean along each column. Can only be used with numeric data. - If \"median\", then replace missing values using the median along each column. Can only be used with numeric data. - If \"most_frequent\", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. If there is more than one such value, only the smallest is returned. - If \"constant\", then replace missing values with fill_value. Can be used with strings or numeric data. .. versionadded:: 0.20 strategy=\"constant\" for fixed value imputation." - }, - { - "name": "fill_value", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When strategy == \"constant\", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and \"missing_value\" for strings or object data types." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity of the imputer." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a copy of X will be created. If False, imputation will be done in-place whenever possible. Note that, in the following cases, a new copy will always be made, even if `copy=False`: - If X is not an array of floating values; - If X is encoded as a CSR matrix; - If add_indicator=True." - }, - { - "name": "add_indicator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, a :class:`MissingIndicator` transform will stack onto output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nReturns\n-------\nself : SimpleImputer" - }, - { - "name": "_sparse_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the transformer on sparse data." - }, - { - "name": "_dense_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the transformer on dense data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The imputed data to be reverted to original data. It has to be an augmented array of imputed data and the missing indicator mask." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the data back to the original representation.\n\nInverts the `transform` operation performed on an array.\nThis operation can only be performed after :class:`SimpleImputer` is\ninstantiated with `add_indicator=True`.\n\nNote that ``inverse_transform`` can only invert the transform in\nfeatures that have binary indicators for missing values. If a feature\nhas no missing values at ``fit`` time, the feature won't have a binary\nindicator, and the imputation done at ``transform`` time won't be\ninverted.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features + n_features_missing_indicator)\n The imputed data to be reverted to original data. It has to be\n an augmented array of imputed data and the missing indicator mask.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n The original X with missing values as it was prior\n to imputation." - } - ], - "docstring": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n estimator which is now removed.\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nstrategy : string, default='mean'\n The imputation strategy.\n\n - If \"mean\", then replace missing values using the mean along\n each column. Can only be used with numeric data.\n - If \"median\", then replace missing values using the median along\n each column. Can only be used with numeric data.\n - If \"most_frequent\", then replace missing using the most frequent\n value along each column. Can be used with strings or numeric data.\n If there is more than one such value, only the smallest is returned.\n - If \"constant\", then replace missing values with fill_value. Can be\n used with strings or numeric data.\n\n .. versionadded:: 0.20\n strategy=\"constant\" for fixed value imputation.\n\nfill_value : string or numerical value, default=None\n When strategy == \"constant\", fill_value is used to replace all\n occurrences of missing_values.\n If left to the default, fill_value will be 0 when imputing numerical\n data and \"missing_value\" for strings or object data types.\n\nverbose : integer, default=0\n Controls the verbosity of the imputer.\n\ncopy : boolean, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible. Note that, in the following cases,\n a new copy will always be made, even if `copy=False`:\n\n - If X is not an array of floating values;\n - If X is encoded as a CSR matrix;\n - If add_indicator=True.\n\nadd_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\nAttributes\n----------\nstatistics_ : array of shape (n_features,)\n The imputation fill value for each feature.\n Computing statistics can result in `np.nan` values.\n During :meth:`transform`, features corresponding to `np.nan`\n statistics will be discarded.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nSee Also\n--------\nIterativeImputer : Multivariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import SimpleImputer\n>>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nSimpleImputer()\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> print(imp_mean.transform(X))\n[[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\n\nNotes\n-----\nColumns which only contained missing values at :meth:`fit` are discarded\nupon :meth:`transform` if strategy is not \"constant\"." - }, - { - "name": "MissingIndicator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "missing_values", - "type": "Optional[Union[float, int, str]]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." - }, - { - "name": "features", - "type": "Literal['missing-only', 'all']", - "hasDefault": true, - "default": "'missing-only'", - "limitation": null, - "ignored": false, - "docstring": "Whether the imputer mask should represent all or a subset of features. - If 'missing-only' (default), the imputer mask will only represent features containing missing values during fit time. - If 'all', the imputer mask will represent all features." - }, - { - "name": "sparse", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the imputer mask format should be sparse or dense. - If 'auto' (default), the imputer mask will be of same type as input. - If True, the imputer mask will be a sparse matrix. - If False, the imputer mask will be a numpy array." - }, - { - "name": "error_on_new", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, transform will raise an error when there are features with missing values in transform that have no missing values in fit. This is applicable only when `features='missing-only'`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_missing_features_info", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data with missing values. Note that ``X`` has been checked in ``fit`` and ``transform`` before to call this function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the imputer mask and the indices of the features\ncontaining missing values.\n\nParameters\n----------\nX : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The input data with missing values. Note that ``X`` has been\n checked in ``fit`` and ``transform`` before to call this function.\n\nReturns\n-------\nimputer_mask : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The imputer mask of the original data.\n\nfeatures_with_missing : ndarray, shape (n_features_with_missing)\n The features containing missing values." - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. If `precomputed` is True, then `X` is a mask of the input data." - }, - { - "name": "precomputed", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether the input data is a mask." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the transformer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n If `precomputed` is True, then `X` is a mask of the\n input data.\n\nprecomputed : bool\n Whether the input data is a mask.\n\nReturns\n-------\nimputer_mask : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The imputer mask of the original data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the transformer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nReturns\n-------\nself : object\n Returns self." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate missing values indicator for X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : {ndarray or sparse matrix}, shape (n_samples, n_features) or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate missing values indicator for X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : {ndarray or sparse matrix}, shape (n_samples, n_features) or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmissing_values : int, float, string, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nfeatures : {'missing-only', 'all'}, default='missing-only'\n Whether the imputer mask should represent all or a subset of\n features.\n\n - If 'missing-only' (default), the imputer mask will only represent\n features containing missing values during fit time.\n - If 'all', the imputer mask will represent all features.\n\nsparse : bool or 'auto', default='auto'\n Whether the imputer mask format should be sparse or dense.\n\n - If 'auto' (default), the imputer mask will be of same type as\n input.\n - If True, the imputer mask will be a sparse matrix.\n - If False, the imputer mask will be a numpy array.\n\nerror_on_new : bool, default=True\n If True, transform will raise an error when there are features with\n missing values in transform that have no missing values in fit. This is\n applicable only when `features='missing-only'`.\n\nAttributes\n----------\nfeatures_ : ndarray, shape (n_missing_features,) or (n_features,)\n The features indices which will be returned when calling ``transform``.\n They are computed during ``fit``. For ``features='all'``, it is\n to ``range(n_features)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import MissingIndicator\n>>> X1 = np.array([[np.nan, 1, 3],\n... [4, 0, np.nan],\n... [8, 1, 0]])\n>>> X2 = np.array([[5, 1, np.nan],\n... [np.nan, 2, 3],\n... [2, 4, 0]])\n>>> indicator = MissingIndicator()\n>>> indicator.fit(X1)\nMissingIndicator()\n>>> X2_tr = indicator.transform(X2)\n>>> X2_tr\narray([[False, True],\n [ True, False],\n [False, False]])" - } - ], - "functions": [ - { - "name": "_check_inputs_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_most_frequent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the most frequent value in a 1d array extended with\n[extra_value] * n_repeat, where extra_value is assumed to be not part\nof the array." - } - ] - }, - { - "name": "sklearn.impute._iterative", - "imports": [ - "from time import time", - "from collections import namedtuple", - "import warnings", - "from scipy import stats", - "import numpy as np", - "from base import clone", - "from exceptions import ConvergenceWarning", - "from preprocessing import normalize", - "from utils import check_array", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils import is_scalar_nan", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import check_is_fitted", - "from utils._mask import _get_mask", - "from _base import _BaseImputer", - "from _base import SimpleImputer", - "from _base import _check_inputs_dtype", - "from linear_model import BayesianRidge" - ], - "classes": [ - { - "name": "IterativeImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to use at each step of the round-robin imputation. If ``sample_posterior`` is True, the estimator must support ``return_std`` in its ``predict`` method." - }, - { - "name": "missing_values", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." - }, - { - "name": "sample_posterior", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to sample from the (Gaussian) predictive posterior of the fitted estimator for each imputation. Estimator must support ``return_std`` in its ``predict`` method if set to ``True``. Set to ``True`` if using ``IterativeImputer`` for multiple imputations." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of imputation rounds to perform before returning the imputations computed during the final round. A round is a single imputation of each feature with missing values. The stopping criterion is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals]))` < tol, where `X_t` is `X` at iteration `t. Note that early stopping is only applied if ``sample_posterior=False``." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "n_nearest_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of other features to use to estimate the missing values of each feature column. Nearness between features is measured using the absolute correlation coefficient between each feature pair (after initial imputation). To ensure coverage of features throughout the imputation process, the neighbor features are not necessarily nearest, but are drawn with probability proportional to correlation for each imputed target feature. Can provide significant speed-up when the number of features is huge. If ``None``, all features will be used." - }, - { - "name": "initial_strategy", - "type": "str", - "hasDefault": true, - "default": "'mean'", - "limitation": null, - "ignored": false, - "docstring": "Which strategy to use to initialize the missing values. Same as the ``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer` Valid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}." - }, - { - "name": "imputation_order", - "type": "str", - "hasDefault": true, - "default": "'ascending'", - "limitation": null, - "ignored": false, - "docstring": "The order in which the features will be imputed. Possible values: \"ascending\" From features with fewest missing values to most. \"descending\" From features with most missing values to fewest. \"roman\" Left to right. \"arabic\" Right to left. \"random\" A random order for each round." - }, - { - "name": "skip_complete", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``True`` then features with missing values during ``transform`` which did not have any missing values during ``fit`` will be imputed with the initial imputation method only. Set to ``True`` if you have many features with no missing values at both ``fit`` and ``transform`` time to save compute." - }, - { - "name": "min_value", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "-np", - "limitation": null, - "ignored": false, - "docstring": "Minimum possible imputed value. Broadcast to shape (n_features,) if scalar. If array-like, expects shape (n_features,), one min value for each feature. The default is `-np.inf`. .. versionchanged:: 0.23 Added support for array-like." - }, - { - "name": "max_value", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Maximum possible imputed value. Broadcast to shape (n_features,) if scalar. If array-like, expects shape (n_features,), one max value for each feature. The default is `np.inf`. .. versionchanged:: 0.23 Added support for array-like." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity flag, controls the debug messages that are issued as functions are evaluated. The higher, the more verbose. Can be 0, 1, or 2." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator to use. Randomizes selection of estimator features if n_nearest_features is not None, the ``imputation_order`` if ``random``, and the sampling from posterior if ``sample_posterior`` is True. Use an integer for determinism. See :term:`the Glossary `." - }, - { - "name": "add_indicator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, a :class:`MissingIndicator` transform will stack onto output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_impute_one_feature", - "decorators": [], - "parameters": [ - { - "name": "X_filled", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data with the most recent imputations." - }, - { - "name": "mask_missing_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data's missing indicator matrix." - }, - { - "name": "feat_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the feature currently being imputed." - }, - { - "name": "neighbor_feat_idx", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the features to be used in imputing ``feat_idx``." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to use at this step of the round-robin imputation. If ``sample_posterior`` is True, the estimator must support ``return_std`` in its ``predict`` method. If None, it will be cloned from self._estimator." - }, - { - "name": "fit_mode", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit and predict with the estimator or just predict." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Impute a single feature from the others provided.\n\nThis function predicts the missing values of one of the features using\nthe current estimates of all the other features. The ``estimator`` must\nsupport ``return_std=True`` in its ``predict`` method for this function\nto work.\n\nParameters\n----------\nX_filled : ndarray\n Input data with the most recent imputations.\n\nmask_missing_values : ndarray\n Input data's missing indicator matrix.\n\nfeat_idx : int\n Index of the feature currently being imputed.\n\nneighbor_feat_idx : ndarray\n Indices of the features to be used in imputing ``feat_idx``.\n\nestimator : object\n The estimator to use at this step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n If None, it will be cloned from self._estimator.\n\nfit_mode : boolean, default=True\n Whether to fit and predict with the estimator or just predict.\n\nReturns\n-------\nX_filled : ndarray\n Input data with ``X_filled[missing_row_mask, feat_idx]`` updated.\n\nestimator : estimator with sklearn API\n The fitted estimator used to impute\n ``X_filled[missing_row_mask, feat_idx]``." - }, - { - "name": "_get_neighbor_feat_idx", - "decorators": [], - "parameters": [ - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features in ``X``." - }, - { - "name": "feat_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the feature currently being imputed." - }, - { - "name": "abs_corr_mat", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Absolute correlation matrix of ``X``. The diagonal has been zeroed out and each feature has been normalized to sum to 1. Can be None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a list of other features to predict ``feat_idx``.\n\nIf self.n_nearest_features is less than or equal to the total\nnumber of features, then use a probability proportional to the absolute\ncorrelation between ``feat_idx`` and each other feature to randomly\nchoose a subsample of the other features (without replacement).\n\nParameters\n----------\nn_features : int\n Number of features in ``X``.\n\nfeat_idx : int\n Index of the feature currently being imputed.\n\nabs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X``. The diagonal has been zeroed\n out and each feature has been normalized to sum to 1. Can be None.\n\nReturns\n-------\nneighbor_feat_idx : array-like\n The features to use to impute ``feat_idx``." - }, - { - "name": "_get_ordered_idx", - "decorators": [], - "parameters": [ - { - "name": "mask_missing_values", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data's missing indicator matrix, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decide in what order we will update the features.\n\nAs a homage to the MICE R package, we will have 4 main options of\nhow to order the updates, and use a random order if anything else\nis specified.\n\nAlso, this function skips features which have no missing values.\n\nParameters\n----------\nmask_missing_values : array-like, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\nReturns\n-------\nordered_idx : ndarray, shape (n_features,)\n The order in which to impute the features." - }, - { - "name": "_get_abs_corr_mat", - "decorators": [], - "parameters": [ - { - "name": "X_filled", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data with the most recent imputations." - }, - { - "name": "tolerance", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "``abs_corr_mat`` can have nans, which will be replaced with ``tolerance``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get absolute correlation matrix between features.\n\nParameters\n----------\nX_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\ntolerance : float, default=1e-6\n ``abs_corr_mat`` can have nans, which will be replaced\n with ``tolerance``.\n\nReturns\n-------\nabs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X`` at the beginning of the\n current round. The diagonal has been zeroed out and each feature's\n absolute correlations with all others have been normalized to sum\n to 1." - }, - { - "name": "_initial_imputation", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - }, - { - "name": "in_fit", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether function is called in fit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform initial imputation for input X.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\nin_fit : bool, default=False\n Whether function is called in fit.\n\nReturns\n-------\nXt : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\nX_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\nmask_missing_values : ndarray, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\nX_missing_mask : ndarray, shape (n_samples, n_features)\n Input data's mask matrix indicating missing datapoints, where\n \"n_samples\" is the number of samples and \"n_features\" is the\n number of features." - }, - { - "name": "_validate_limit", - "decorators": [], - "parameters": [ - { - "name": "limit: scalar or array-like", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-specified limit (i.e, min_value or max_value)" - }, - { - "name": "limit_type: string", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n_features: Number of features in the dataset" - }, - { - "name": "\"max\" or \"min\"", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n_features: Number of features in the dataset" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Validate the limits (min/max) of the feature values\nConverts scalar min/max limits to vectors of shape (n_features,)\n\nParameters\n----------\nlimit: scalar or array-like\n The user-specified limit (i.e, min_value or max_value)\nlimit_type: string, \"max\" or \"min\"\n n_features: Number of features in the dataset\n\nReturns\n-------\nlimit: ndarray, shape(n_features,)\n Array of limits, one for each feature" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits the imputer on X and return the transformed X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\ny : ignored.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n The imputed input data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Imputes all missing values in X.\n\nNote that this is stochastic, and that if random_state is not fixed,\nrepeated calls, or permuted input, will yield different results.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n The imputed input data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the imputer on X and return self.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\ny : ignored\n\nReturns\n-------\nself : object\n Returns self." - } - ], - "docstring": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_iterative_imputer``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_iterative_imputer # noqa\n >>> # now you can import normally from sklearn.impute\n >>> from sklearn.impute import IterativeImputer\n\nParameters\n----------\nestimator : estimator object, default=BayesianRidge()\n The estimator to use at each step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n\nmissing_values : int, np.nan, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nsample_posterior : boolean, default=False\n Whether to sample from the (Gaussian) predictive posterior of the\n fitted estimator for each imputation. Estimator must support\n ``return_std`` in its ``predict`` method if set to ``True``. Set to\n ``True`` if using ``IterativeImputer`` for multiple imputations.\n\nmax_iter : int, default=10\n Maximum number of imputation rounds to perform before returning the\n imputations computed during the final round. A round is a single\n imputation of each feature with missing values. The stopping criterion\n is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals]))` < tol,\n where `X_t` is `X` at iteration `t. Note that early stopping is only\n applied if ``sample_posterior=False``.\n\ntol : float, default=1e-3\n Tolerance of the stopping condition.\n\nn_nearest_features : int, default=None\n Number of other features to use to estimate the missing values of\n each feature column. Nearness between features is measured using\n the absolute correlation coefficient between each feature pair (after\n initial imputation). To ensure coverage of features throughout the\n imputation process, the neighbor features are not necessarily nearest,\n but are drawn with probability proportional to correlation for each\n imputed target feature. Can provide significant speed-up when the\n number of features is huge. If ``None``, all features will be used.\n\ninitial_strategy : str, default='mean'\n Which strategy to use to initialize the missing values. Same as the\n ``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer`\n Valid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}.\n\nimputation_order : str, default='ascending'\n The order in which the features will be imputed. Possible values:\n\n \"ascending\"\n From features with fewest missing values to most.\n \"descending\"\n From features with most missing values to fewest.\n \"roman\"\n Left to right.\n \"arabic\"\n Right to left.\n \"random\"\n A random order for each round.\n\nskip_complete : boolean, default=False\n If ``True`` then features with missing values during ``transform``\n which did not have any missing values during ``fit`` will be imputed\n with the initial imputation method only. Set to ``True`` if you have\n many features with no missing values at both ``fit`` and ``transform``\n time to save compute.\n\nmin_value : float or array-like of shape (n_features,), default=-np.inf\n Minimum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one min value for\n each feature. The default is `-np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\nmax_value : float or array-like of shape (n_features,), default=np.inf\n Maximum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one max value for\n each feature. The default is `np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\nverbose : int, default=0\n Verbosity flag, controls the debug messages that are issued\n as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n or 2.\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use. Randomizes\n selection of estimator features if n_nearest_features is not None, the\n ``imputation_order`` if ``random``, and the sampling from posterior if\n ``sample_posterior`` is True. Use an integer for determinism.\n See :term:`the Glossary `.\n\nadd_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\nAttributes\n----------\ninitial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n Imputer used to initialize the missing values.\n\nimputation_sequence_ : list of tuples\n Each tuple has ``(feat_idx, neighbor_feat_idx, estimator)``, where\n ``feat_idx`` is the current feature to be imputed,\n ``neighbor_feat_idx`` is the array of other features used to impute the\n current feature, and ``estimator`` is the trained estimator used for\n the imputation. Length is ``self.n_features_with_missing_ *\n self.n_iter_``.\n\nn_iter_ : int\n Number of iteration rounds that occurred. Will be less than\n ``self.max_iter`` if early stopping criterion was reached.\n\nn_features_with_missing_ : int\n Number of features with missing values.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\nSee Also\n--------\nSimpleImputer : Univariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.experimental import enable_iterative_imputer\n>>> from sklearn.impute import IterativeImputer\n>>> imp_mean = IterativeImputer(random_state=0)\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nIterativeImputer(random_state=0)\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> imp_mean.transform(X)\narray([[ 6.9584..., 2. , 3. ],\n [ 4. , 2.6000..., 6. ],\n [10. , 4.9999..., 9. ]])\n\nNotes\n-----\nTo support imputation in inductive mode we store each feature's estimator\nduring the ``fit`` phase, and predict without refitting (in order) during\nthe ``transform`` phase.\n\nFeatures which contain all missing values at ``fit`` are discarded upon\n``transform``.\n\nReferences\n----------\n.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n Multivariate Imputation by Chained Equations in R\". Journal of\n Statistical Software 45: 1-67.\n `_\n\n.. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n Multivariate Data Suitable for use with an Electronic Computer\".\n Journal of the Royal Statistical Society 22(2): 302-306.\n `_" - } - ], - "functions": [] - }, - { - "name": "sklearn.impute._knn", - "imports": [ - "import numpy as np", - "from _base import _BaseImputer", - "from utils.validation import FLOAT_DTYPES", - "from metrics import pairwise_distances_chunked", - "from metrics.pairwise import _NAN_METRICS", - "from neighbors._base import _get_weights", - "from neighbors._base import _check_weights", - "from utils import is_scalar_nan", - "from utils._mask import _get_mask", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KNNImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "missing_values", - "type": "Optional[Union[float, int, str]]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to np.nan, since `pd.NA` will be converted to np.nan." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighboring samples to use for imputation." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - callable : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights." - }, - { - "name": "metric", - "type": "Literal['nan_euclidean']", - "hasDefault": true, - "default": "'nan_euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Distance metric for searching neighbors. Possible values: - 'nan_euclidean' - callable : a user-defined function which conforms to the definition of ``_pairwise_callable(X, Y, metric, **kwds)``. The function accepts two arrays, X and Y, and a `missing_values` keyword in `kwds` and returns a scalar distance value." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a copy of X will be created. If False, imputation will be done in-place whenever possible." - }, - { - "name": "add_indicator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, a :class:`MissingIndicator` transform will stack onto the output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_calc_impute", - "decorators": [], - "parameters": [ - { - "name": "dist_pot_donors", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distance matrix between the receivers and potential donors from training set. There must be at least one non-nan distance between a receiver and a potential donor." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to consider." - }, - { - "name": "fit_X_col", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Column of potential donors from training set." - }, - { - "name": "mask_fit_X_col", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Missing mask for fit_X_col." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Helper function to impute a single column.\n\nParameters\n----------\ndist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n Distance matrix between the receivers and potential donors from\n training set. There must be at least one non-nan distance between\n a receiver and a potential donor.\n\nn_neighbors : int\n Number of neighbors to consider.\n\nfit_X_col : ndarray of shape (n_potential_donors,)\n Column of potential donors from training set.\n\nmask_fit_X_col : ndarray of shape (n_potential_donors,)\n Missing mask for fit_X_col.\n\nReturns\n-------\nimputed_values: ndarray of shape (n_receivers,)\n Imputed values for receiver." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : array-like shape of (n_samples, n_features)\n Input data, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nself : object" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_output_features)\n The imputed dataset. `n_output_features` is the number of features\n that is not always missing during `fit`." - } - ], - "docstring": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\nn_neighbors : int, default=5\n Number of neighboring samples to use for imputation.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n Weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood are\n weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - callable : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nmetric : {'nan_euclidean'} or callable, default='nan_euclidean'\n Distance metric for searching neighbors. Possible values:\n\n - 'nan_euclidean'\n - callable : a user-defined function which conforms to the definition\n of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n accepts two arrays, X and Y, and a `missing_values` keyword in\n `kwds` and returns a scalar distance value.\n\ncopy : bool, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible.\n\nadd_indicator : bool, default=False\n If True, a :class:`MissingIndicator` transform will stack onto the\n output of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on the\n missing indicator even if there are missing values at transform/test\n time.\n\nAttributes\n----------\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nReferences\n----------\n* Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n no. 6, 2001 Pages 520-525.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import KNNImputer\n>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n>>> imputer = KNNImputer(n_neighbors=2)\n>>> imputer.fit_transform(X)\narray([[1. , 2. , 4. ],\n [3. , 4. , 3. ],\n [5.5, 6. , 5. ],\n [8. , 8. , 7. ]])" - } - ], - "functions": [] - }, - { - "name": "sklearn.impute", - "imports": [ - "import typing", - "from _base import MissingIndicator", - "from _base import SimpleImputer", - "from _knn import KNNImputer", - "from _iterative import IterativeImputer" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.impute.tests.test_base", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.impute._base import _BaseImputer", - "from sklearn.utils._mask import _get_mask" - ], - "classes": [ - { - "name": "NoFitIndicatorImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoTransformIndicatorImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoPrecomputedMaskFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoPrecomputedMaskTransform", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_imputer_not_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_imputer_not_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_no_precomputed_mask_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_no_precomputed_mask_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests.test_common", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.experimental import enable_iterative_imputer", - "from sklearn.impute import IterativeImputer", - "from sklearn.impute import KNNImputer", - "from sklearn.impute import SimpleImputer" - ], - "classes": [], - "functions": [ - { - "name": "test_imputation_missing_value_in_test_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputers_add_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputers_add_indicator_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputers_pandas_na_integer_array_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests.test_impute", - "imports": [ - "from __future__ import division", - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats import kstest", - "import io", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.experimental import enable_iterative_imputer", - "from sklearn.datasets import load_diabetes", - "from sklearn.impute import MissingIndicator", - "from sklearn.impute import SimpleImputer", - "from sklearn.impute import IterativeImputer", - "from sklearn.dummy import DummyRegressor", - "from sklearn.linear_model import BayesianRidge", - "from sklearn.linear_model import ARDRegression", - "from sklearn.linear_model import RidgeCV", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import make_union", - "from sklearn.model_selection import GridSearchCV", - "from sklearn import tree", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.impute._base import _most_frequent" - ], - "classes": [], - "functions": [ - { - "name": "_check_statistics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Utility function for testing imputation for a given strategy.\n\nTest with dense and sparse arrays\n\nCheck that:\n - the statistics (mean, median, mode) are correct\n - the missing values are imputed correctly" - }, - { - "name": "test_imputation_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_error_invalid_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_deletion_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_error_sparse_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "safe_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "safe_mean", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_mean_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_median_special_cases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_mean_median_error_invalid_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_mean_median_error_invalid_type_list_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_const_mostf_error_invalid_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_most_frequent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_most_frequent_objects", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_most_frequent_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_error_invalid_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_integer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_one_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_pipeline_grid_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_zero_iters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_all_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_imputation_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_clip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_clip_truncnorm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_truncated_normal_posterior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_missing_at_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_transform_stochasticity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_no_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_rank_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_transform_recovery", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_additive_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_error_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_catch_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_min_max_array_like", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_catch_min_max_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_min_max_array_like_imputation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_skip_non_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_dont_set_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_new", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_raise_on_sparse_with_missing_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_sparse_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_with_imputer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inconsistent_dtype_X_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_no_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_sparse_no_explicit_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputer_without_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_add_indicator_sparse_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_string_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_inverse_transform_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests.test_knn", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn import config_context", - "from sklearn.impute import KNNImputer", - "from sklearn.metrics.pairwise import nan_euclidean_distances", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_knn_imputer_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_default_with_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_removes_all_na_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_zero_nan_imputes_the_same", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_verify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_one_n_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_all_samples_are_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_weight_uniform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_weight_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_callable_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_with_simple_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_not_enough_valid_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_drops_all_nan_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_distance_weighted_not_enough_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection.setup", - "imports": [ - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.inspection._partial_dependence", - "imports": [ - "from collections.abc import Iterable", - "import warnings", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats.mstats import mquantiles", - "from base import is_classifier", - "from base import is_regressor", - "from pipeline import Pipeline", - "from utils.extmath import cartesian", - "from utils import check_array", - "from utils import check_matplotlib_support", - "from utils import _safe_indexing", - "from utils import _determine_key_type", - "from utils import _get_column_indices", - "from utils.validation import check_is_fitted", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args", - "from tree import DecisionTreeRegressor", - "from ensemble import RandomForestRegressor", - "from exceptions import NotFittedError", - "from ensemble._gb import BaseGradientBoosting", - "from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import BaseHistGradientBoosting" - ], - "classes": [], - "functions": [ - { - "name": "_grid_from_X", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "percentiles", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The percentiles which are used to construct the extreme values of the grid. Must be in [0, 1]." - }, - { - "name": "grid_resolution", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of equally spaced points to be placed on the grid for each feature." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\nIf ``grid_resolution`` is bigger than the number of unique values in the\njth column of X, then those unique values will be used instead.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_target_features)\n The data.\n\npercentiles : tuple of floats\n The percentiles which are used to construct the extreme values of\n the grid. Must be in [0, 1].\n\ngrid_resolution : int\n The number of equally spaced points to be placed on the grid for each\n feature.\n\nReturns\n-------\ngrid : ndarray, shape (n_points, n_target_features)\n A value for each feature at each point in the grid. ``n_points`` is\n always ``<= grid_resolution ** X.shape[1]``.\n\nvalues : list of 1d ndarrays\n The values with which the grid has been created. The size of each\n array ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller." - }, - { - "name": "_partial_dependence_recursion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_dependence_brute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A fitted estimator object implementing :term:`predict`, :term:`predict_proba`, or :term:`decision_function`. Multioutput-multiclass classifiers are not supported." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``X`` is used to generate a grid of values for the target ``features`` (where the partial dependence will be evaluated), and also to generate values for the complement features when the `method` is 'brute'." - }, - { - "name": "features", - "type": "Union[str, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature (e.g. `[0]`) or pair of interacting features (e.g. `[(0, 1)]`) for which the partial dependency should be computed." - }, - { - "name": "response_method", - "type": "Literal['auto', 'predict_proba', 'decision_function']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. For regressors this parameter is ignored and the response is always the output of :term:`predict`. By default, :term:`predict_proba` is tried first and we revert to :term:`decision_function` if it doesn't exist. If ``method`` is 'recursion', the response is always the output of :term:`decision_function`." - }, - { - "name": "percentiles", - "type": "Tuple[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper percentile used to create the extreme values for the grid. Must be in [0, 1]." - }, - { - "name": "grid_resolution", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of equally spaced points on the grid, for each target feature." - }, - { - "name": "method", - "type": "Literal['auto', 'recursion', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The method used to calculate the averaged predictions: - `'recursion'` is only supported for some tree-based estimators (namely :class:`~sklearn.ensemble.GradientBoostingClassifier`, :class:`~sklearn.ensemble.GradientBoostingRegressor`, :class:`~sklearn.ensemble.HistGradientBoostingClassifier`, :class:`~sklearn.ensemble.HistGradientBoostingRegressor`, :class:`~sklearn.tree.DecisionTreeRegressor`, :class:`~sklearn.ensemble.RandomForestRegressor`, ) when `kind='average'`. This is more efficient in terms of speed. With this method, the target response of a classifier is always the decision function, not the predicted probabilities. Since the `'recursion'` method implicitely computes the average of the Individual Conditional Expectation (ICE) by design, it is not compatible with ICE and thus `kind` must be `'average'`. - `'brute'` is supported for any estimator, but is more computationally intensive. - `'auto'`: the `'recursion'` is used for estimators that support it, and `'brute'` is used otherwise. Please see :ref:`this note ` for differences between the `'brute'` and `'recursion'` method." - }, - { - "name": "kind", - "type": "Literal['legacy', 'average', 'individual', 'both']", - "hasDefault": true, - "default": "'legacy'", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the partial dependence averaged across all the samples in the dataset or one line per sample or both. See Returns below. Note that the fast `method='recursion'` option is only available for `kind='average'`. Plotting individual dependencies requires using the slower `method='brute'` option. .. versionadded:: 0.24 .. deprecated:: 0.24 `kind='legacy'` is deprecated and will be removed in version 1.1. `kind='average'` will be the new default. It is intended to migrate from the ndarray output to :class:`~sklearn.utils.Bunch` output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide `.\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is 'brute'.\n\nfeatures : array-like of {int, str}\n The feature (e.g. `[0]`) or pair of interacting features\n (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is 'recursion', the response is always the output of\n :term:`decision_function`.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the grid. Must be in [0, 1].\n\ngrid_resolution : int, default=100\n The number of equally spaced points on the grid, for each target\n feature.\n\nmethod : {'auto', 'recursion', 'brute'}, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`,\n ) when `kind='average'`.\n This is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the Individual Conditional Expectation (ICE) by\n design, it is not compatible with ICE and thus `kind` must be\n `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\nkind : {'legacy', 'average', 'individual', 'both'}, default='legacy'\n Whether to return the partial dependence averaged across all the\n samples in the dataset or one line per sample or both.\n See Returns below.\n\n Note that the fast `method='recursion'` option is only available for\n `kind='average'`. Plotting individual dependencies requires using the\n slower `method='brute'` option.\n\n .. versionadded:: 0.24\n .. deprecated:: 0.24\n `kind='legacy'` is deprecated and will be removed in version 1.1.\n `kind='average'` will be the new default. It is intended to migrate\n from the ndarray output to :class:`~sklearn.utils.Bunch` output.\n\n\nReturns\n-------\npredictions : ndarray or :class:`~sklearn.utils.Bunch`\n\n - if `kind='legacy'`, return value is ndarray of shape (n_outputs, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if ``method``\n is 'recursion').\n\n - if `kind='individual'`, `'average'` or `'both'`, return value is :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n individual : ndarray of shape (n_outputs, n_instances, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid for all\n samples in X. This is also known as Individual\n Conditional Expectation (ICE)\n\n average : ndarray of shape (n_outputs, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if\n ``method`` is 'recursion').\n Only available when kind='both'.\n\n values : seq of 1d ndarrays\n The values with which the grid has been created. The generated\n grid is a cartesian product of the arrays in ``values``.\n ``len(values) == len(features)``. The size of each array\n ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller.\n\n ``n_outputs`` corresponds to the number of classes in a multi-class\n setting, or to the number of tasks for multi-output regression.\n For classical regression and binary classification ``n_outputs==1``.\n ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\nvalues : seq of 1d ndarrays\n The values with which the grid has been created. The generated grid\n is a cartesian product of the arrays in ``values``. ``len(values) ==\n len(features)``. The size of each array ``values[j]`` is either\n ``grid_resolution``, or the number of unique values in ``X[:, j]``,\n whichever is smaller. Only available when `kind=\"legacy\"`.\n\nSee Also\n--------\nplot_partial_dependence : Plot Partial Dependence.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> X = [[0, 0, 2], [1, 0, 0]]\n>>> y = [0, 1]\n>>> from sklearn.ensemble import GradientBoostingClassifier\n>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n... grid_resolution=2) # doctest: +SKIP\n(array([[-4.52..., 4.52...]]), [array([ 0., 1.])])" - } - ] - }, - { - "name": "sklearn.inspection._permutation_importance", - "imports": [ - "import numpy as np", - "from joblib import Parallel", - "from metrics import check_scoring", - "from utils import Bunch", - "from utils import check_random_state", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [], - "functions": [ - { - "name": "_weights_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_calculate_permutation_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate score when `col_idx` is permuted." - }, - { - "name": "permutation_importance", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator that has already been :term:`fitted` and is compatible with :term:`scorer`." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data on which permutation importance will be computed." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised or `None` for unsupervised." - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scorer to use. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of times to permute a feature." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. The computation is done by computing permutation score for each columns and parallelized over the columns. `None` means 1 unless in a :obj:`joblib.parallel_backend` context. `-1` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the permutations of each feature. Pass an int to get reproducible results across function calls. See :term: `Glossary `." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights used in scoring. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Permutation importance for feature evaluation [BRE]_.\n\nThe :term:`estimator` is required to be a fitted estimator. `X` can be the\ndata set used to train the estimator or a hold-out set. The permutation\nimportance of a feature is calculated as follows. First, a baseline metric,\ndefined by :term:`scoring`, is evaluated on a (potentially different)\ndataset defined by the `X`. Next, a feature column from the validation set\nis permuted and the metric is evaluated again. The permutation importance\nis defined to be the difference between the baseline metric and metric from\npermutating the feature column.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object\n An estimator that has already been :term:`fitted` and is compatible\n with :term:`scorer`.\n\nX : ndarray or DataFrame, shape (n_samples, n_features)\n Data on which permutation importance will be computed.\n\ny : array-like or None, shape (n_samples, ) or (n_samples, n_classes)\n Targets for supervised or `None` for unsupervised.\n\nscoring : string, callable or None, default=None\n Scorer to use. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used.\n\nn_repeats : int, default=5\n Number of times to permute a feature.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel. The computation is done by computing\n permutation score for each columns and parallelized over the columns.\n `None` means 1 unless in a :obj:`joblib.parallel_backend` context.\n `-1` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Pseudo-random number generator to control the permutations of each\n feature.\n Pass an int to get reproducible results across function calls.\n See :term: `Glossary `.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights used in scoring.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nresult : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n importances_mean : ndarray, shape (n_features, )\n Mean of feature importance over `n_repeats`.\n importances_std : ndarray, shape (n_features, )\n Standard deviation over `n_repeats`.\n importances : ndarray, shape (n_features, n_repeats)\n Raw permutation importance scores.\n\nReferences\n----------\n.. [BRE] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32,\n 2001. https://doi.org/10.1023/A:1010933404324\n\nExamples\n--------\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.inspection import permutation_importance\n>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],\n... [0, 9, 9],[0, 9, 9],[0, 9, 9]]\n>>> y = [1, 1, 1, 0, 0, 0]\n>>> clf = LogisticRegression().fit(X, y)\n>>> result = permutation_importance(clf, X, y, n_repeats=10,\n... random_state=0)\n>>> result.importances_mean\narray([0.4666..., 0. , 0. ])\n>>> result.importances_std\narray([0.2211..., 0. , 0. ])" - } - ] - }, - { - "name": "sklearn.inspection", - "imports": [ - "from _permutation_importance import permutation_importance", - "from _partial_dependence import partial_dependence", - "from _plot.partial_dependence import plot_partial_dependence", - "from _plot.partial_dependence import PartialDependenceDisplay" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection.tests.test_partial_dependence", - "imports": [ - "import numpy as np", - "import pytest", - "import sklearn", - "from sklearn.inspection import partial_dependence", - "from sklearn.inspection._partial_dependence import _grid_from_X", - "from sklearn.inspection._partial_dependence import _partial_dependence_brute", - "from sklearn.inspection._partial_dependence import _partial_dependence_recursion", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import MultiTaskLasso", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.cluster import KMeans", - "from sklearn.compose import make_column_transformer", - "from sklearn.metrics import r2_score", - "from sklearn.preprocessing import PolynomialFeatures", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import RobustScaler", - "from sklearn.pipeline import make_pipeline", - "from sklearn.dummy import DummyClassifier", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils import _IS_32BIT", - "from sklearn.utils.validation import check_random_state", - "from sklearn.tree.tests.test_tree import assert_is_subtree" - ], - "classes": [ - { - "name": "NoPredictProbaNoDecisionFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_output_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_from_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_from_X_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_helpers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_recursion_decision_tree_vs_forest_and_gbdt", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_recursion_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_easy_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_slice_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_unknown_feature_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_unknown_feature_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_X_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_recursion_non_constant_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hist_gbdt_sw_not_supported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_feature_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kind_average_and_average_of_individual", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_for_kind_legacy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.inspection.tests.test_permutation_importance", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.compose import ColumnTransformer", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.dummy import DummyClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.impute import SimpleImputer", - "from sklearn.inspection import permutation_importance", - "from sklearn.model_selection import train_test_split", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import KBinsDiscretizer", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import scale", - "from sklearn.utils import parallel_backend", - "from sklearn.utils._testing import _convert_container" - ], - "classes": [], - "functions": [ - { - "name": "test_permutation_importance_correlated_feature_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_correlated_feature_regression_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robustness_to_high_cardinality_noisy_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_mixed_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_mixed_types_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_linear_regresssion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_equivalence_sequential_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_equivalence_array_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_large_memmaped_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_no_weights_scoring_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.inspection.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection._plot.partial_dependence", - "imports": [ - "import numbers", - "from itertools import chain", - "from math import ceil", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats.mstats import mquantiles", - "from joblib import Parallel", - "from None import partial_dependence", - "from base import is_regressor", - "from utils import check_array", - "from utils import check_matplotlib_support", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "import matplotlib.pyplot as plt", - "from matplotlib import transforms", - "from matplotlib.gridspec import GridSpecFromSubplotSpec" - ], - "classes": [ - { - "name": "PartialDependenceDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "pd_results", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Results of :func:`~sklearn.inspection.partial_dependence` for ``features``." - }, - { - "name": "features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of features for a given plot. A tuple of one integer will plot a partial dependence curve of one feature. A tuple of two integers will plot a two-way partial dependence curve as a contour plot." - }, - { - "name": "feature_names", - "type": "List[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature names corresponding to the indices in ``features``." - }, - { - "name": "target_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- In a multiclass setting, specifies the class for which the PDPs should be computed. Note that for binary classification, the positive class (index 1) is always used. - In a multioutput setting, specifies the task for which the PDPs should be computed. Ignored in binary classification or classical regression settings." - }, - { - "name": "pdp_lim", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Global min and max average predictions, such that all plots will have the same scale and y limits. `pdp_lim[1]` is the global min and max for single partial dependence curves. `pdp_lim[2]` is the global min and max for two-way partial dependence curves." - }, - { - "name": "deciles", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Deciles for feature indices in ``features``." - }, - { - "name": "kind", - "type": "Literal['average', 'individual', 'both']", - "hasDefault": true, - "default": "'average'", - "limitation": null, - "ignored": false, - "docstring": " Whether to plot the partial dependence averaged across all the samples in the dataset or one line per sample or both. - ``kind='average'`` results in the traditional PD plot; - ``kind='individual'`` results in the ICE plot. Note that the fast ``method='recursion'`` option is only available for ``kind='average'``. Plotting individual dependencies requires using the slower ``method='brute'`` option. .. versionadded:: 0.24" - }, - { - "name": "subsample", - "type": "Optional[Union[int, float]]", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Sampling for ICE curves when `kind` is 'individual' or 'both'. If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to be used to plot ICE curves. If int, represents the maximum absolute number of samples to use. Note that the full dataset is still used to calculate partial dependence when `kind='both'`. .. versionadded:: 0.24" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the selected samples when subsamples is not `None`. See :term:`Glossary ` for details. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_sample_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the number of samples as an integer." - }, - { - "name": "_plot_ice_lines", - "decorators": [], - "parameters": [ - { - "name": "preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The predictions computed for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "feature_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature values for which the predictions have been computed." - }, - { - "name": "n_ice_to_plot", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of ICE lines to plot." - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE lines." - }, - { - "name": "pd_plot_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "n_total_lines_by_plot", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The total number of lines expected to be plot on the axis." - }, - { - "name": "individual_line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the ICE lines." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot the ICE lines.\n\nParameters\n----------\npreds : ndarray of shape (n_instances, n_grid_points)\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nn_ice_to_plot : int\n The number of ICE lines to plot.\nax : Matplotlib axes\n The axis on which to plot the ICE lines.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nn_total_lines_by_plot : int\n The total number of lines expected to be plot on the axis.\nindividual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines." - }, - { - "name": "_plot_average_dependence", - "decorators": [], - "parameters": [ - { - "name": "avg_preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The average predictions for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "feature_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature values for which the predictions have been computed." - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE lines." - }, - { - "name": "pd_line_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the PD plot." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot the average partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nax : Matplotlib axes\n The axis on which to plot the ICE lines.\npd_line_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nline_kw : dict\n Dict with keywords passed when plotting the PD plot." - }, - { - "name": "_plot_one_way_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The predictions computed for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "avg_preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The average predictions for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "feature_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature values for which the predictions have been computed." - }, - { - "name": "feature_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index corresponding to the target feature." - }, - { - "name": "n_ice_lines", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of ICE lines to plot." - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE and PDP lines." - }, - { - "name": "n_cols", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of column in the axis." - }, - { - "name": "pd_plot_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "n_lines", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The total number of lines expected to be plot on the axis." - }, - { - "name": "individual_line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the ICE lines." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the PD plot." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot 1-way partial dependence: ICE and PDP.\n\nParameters\n----------\npreds : ndarray of shape (n_instances, n_grid_points) or None\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\navg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nfeature_idx : int\n The index corresponding to the target feature.\nn_ice_lines : int\n The number of ICE lines to plot.\nax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\nn_cols : int or None\n The number of column in the axis.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nn_lines : int\n The total number of lines expected to be plot on the axis.\nindividual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\nline_kw : dict\n Dict with keywords passed when plotting the PD plot." - }, - { - "name": "_plot_two_way_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "avg_preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The average predictions for all points of `feature_values[0]` and `feature_values[1]` for some given features for all samples in `X`." - }, - { - "name": "feature_values", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A sequence of array of the feature values for which the predictions have been computed." - }, - { - "name": "feature_idx", - "type": "Tuple[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the target features" - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE and PDP lines." - }, - { - "name": "pd_plot_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "Z_level", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Z-level used to encode the average predictions." - }, - { - "name": "contour_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the contours." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot 2-way partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_instances, n_grid_points, n_grid_points)\n The average predictions for all points of `feature_values[0]` and\n `feature_values[1]` for some given features for all samples in `X`.\nfeature_values : seq of 1d array\n A sequence of array of the feature values for which the predictions\n have been computed.\nfeature_idx : tuple of int\n The indices of the target features\nax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nZ_level : ndarray of shape (8, 8)\n The Z-level used to encode the average predictions.\ncontour_kw : dict\n Dict with keywords passed when plotting the contours." - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- If a single axis is passed in, it is treated as a bounding axes and a grid of partial dependence plots will be drawn within these bounds. The `n_cols` parameter controls the number of columns in the grid. - If an array-like of axes are passed in, the partial dependence plots will be drawn directly into these axes. - If `None`, a figure and a bounding axes is created and treated as the single axes case." - }, - { - "name": "n_cols", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of columns in the grid plot. Only active when `ax` is a single axes or `None`." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the `matplotlib.pyplot.plot` call. For one-way partial dependence plots." - }, - { - "name": "contour_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the `matplotlib.pyplot.contourf` call for two-way partial dependence plots." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot partial dependence plots.\n\nParameters\n----------\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\nn_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when\n `ax` is a single axes or `None`.\n\nline_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.contourf`\n call for two-way partial dependence plots.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`" - } - ], - "docstring": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.plot_partial_dependence` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\nParameters\n----------\npd_results : list of Bunch\n Results of :func:`~sklearn.inspection.partial_dependence` for\n ``features``.\n\nfeatures : list of (int,) or list of (int, int)\n Indices of features for a given plot. A tuple of one integer will plot\n a partial dependence curve of one feature. A tuple of two integers will\n plot a two-way partial dependence curve as a contour plot.\n\nfeature_names : list of str\n Feature names corresponding to the indices in ``features``.\n\ntarget_idx : int\n\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\npdp_lim : dict\n Global min and max average predictions, such that all plots will have\n the same scale and y limits. `pdp_lim[1]` is the global min and max for\n single partial dependence curves. `pdp_lim[2]` is the global min and\n max for two-way partial dependence curves.\n\ndeciles : dict\n Deciles for feature indices in ``features``.\n\nkind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If int, represents the\n maximum absolute number of samples to use.\n\n Note that the full dataset is still used to calculate partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None`. See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nbounding_ax_ : matplotlib Axes or None\n If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n grid of partial dependence plots are drawn. If `ax` is a list of axes\n or a numpy array of axes, `bounding_ax_` is None.\n\naxes_ : ndarray of matplotlib Axes\n If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n in `ax`. Elements that are None correspond to a nonexisting axes in\n that position.\n\nlines_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n curve on the i-th row and j-th column. If `ax` is a list of axes,\n `lines_[i]` is the partial dependence curve corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a line plot.\n\ndeciles_vlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the x axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a PDP plot.\n\n .. versionadded:: 0.23\n\ndeciles_hlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the y axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a 2-way plot.\n\n .. versionadded:: 0.23\n\ncontours_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n plot on the i-th row and j-th column. If `ax` is a list of axes,\n `contours_[i]` is the partial dependence plot corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a contour plot.\n\nfigure_ : matplotlib Figure\n Figure containing partial dependence plots.\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nplot_partial_dependence : Plot Partial Dependence." - } - ], - "functions": [ - { - "name": "plot_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A fitted estimator object implementing :term:`predict`, :term:`predict_proba`, or :term:`decision_function`. Multioutput-multiclass classifiers are not supported." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``X`` is used to generate a grid of values for the target ``features`` (where the partial dependence will be evaluated), and also to generate values for the complement features when the `method` is `'brute'`." - }, - { - "name": "features", - "type": "Union[str, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target features for which to create the PDPs. If `features[i]` is an integer or a string, a one-way PDP is created; if `features[i]` is a tuple, a two-way PDP is created (only supported with `kind='average'`). Each tuple must be of size 2. if any entry is a string, then it must be in ``feature_names``." - }, - { - "name": "feature_names", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of each feature; `feature_names[i]` holds the name of the feature with index `i`. By default, the name of the feature corresponds to their numerical index for NumPy array and their column name for pandas dataframe." - }, - { - "name": "target", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- In a multiclass setting, specifies the class for which the PDPs should be computed. Note that for binary classification, the positive class (index 1) is always used. - In a multioutput setting, specifies the task for which the PDPs should be computed. Ignored in binary classification or classical regression settings." - }, - { - "name": "response_method", - "type": "Literal['auto', 'predict_proba', 'decision_function']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. For regressors this parameter is ignored and the response is always the output of :term:`predict`. By default, :term:`predict_proba` is tried first and we revert to :term:`decision_function` if it doesn't exist. If ``method`` is `'recursion'`, the response is always the output of :term:`decision_function`." - }, - { - "name": "n_cols", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of columns in the grid plot. Only active when `ax` is a single axis or `None`." - }, - { - "name": "grid_resolution", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of equally spaced points on the axes of the plots, for each target feature." - }, - { - "name": "percentiles", - "type": "Tuple[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper percentile used to create the extreme values for the PDP axes. Must be in [0, 1]." - }, - { - "name": "method", - "type": "str", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The method used to calculate the averaged predictions: - `'recursion'` is only supported for some tree-based estimators (namely :class:`~sklearn.ensemble.GradientBoostingClassifier`, :class:`~sklearn.ensemble.GradientBoostingRegressor`, :class:`~sklearn.ensemble.HistGradientBoostingClassifier`, :class:`~sklearn.ensemble.HistGradientBoostingRegressor`, :class:`~sklearn.tree.DecisionTreeRegressor`, :class:`~sklearn.ensemble.RandomForestRegressor` but is more efficient in terms of speed. With this method, the target response of a classifier is always the decision function, not the predicted probabilities. Since the `'recursion'` method implicitely computes the average of the ICEs by design, it is not compatible with ICE and thus `kind` must be `'average'`. - `'brute'` is supported for any estimator, but is more computationally intensive. - `'auto'`: the `'recursion'` is used for estimators that support it, and `'brute'` is used otherwise. Please see :ref:`this note ` for differences between the `'brute'` and `'recursion'` method." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to compute the partial dependences. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbose output during PD computations." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the ``matplotlib.pyplot.plot`` call. For one-way partial dependence plots." - }, - { - "name": "contour_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call. For two-way partial dependence plots." - }, - { - "name": "ax", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- If a single axis is passed in, it is treated as a bounding axes and a grid of partial dependence plots will be drawn within these bounds. The `n_cols` parameter controls the number of columns in the grid. - If an array-like of axes are passed in, the partial dependence plots will be drawn directly into these axes. - If `None`, a figure and a bounding axes is created and treated as the single axes case. .. versionadded:: 0.22" - }, - { - "name": "kind", - "type": "Literal['average', 'individual', 'both']", - "hasDefault": true, - "default": "'average'", - "limitation": null, - "ignored": false, - "docstring": " Whether to plot the partial dependence averaged across all the samples in the dataset or one line per sample or both. - ``kind='average'`` results in the traditional PD plot; - ``kind='individual'`` results in the ICE plot. Note that the fast ``method='recursion'`` option is only available for ``kind='average'``. Plotting individual dependencies requires using the slower ``method='brute'`` option. .. versionadded:: 0.24" - }, - { - "name": "subsample", - "type": "Optional[Union[int, float]]", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Sampling for ICE curves when `kind` is 'individual' or 'both'. If `float`, should be between 0.0 and 1.0 and represent the proportion of the dataset to be used to plot ICE curves. If `int`, represents the absolute number samples to use. Note that the full dataset is still used to calculate averaged partial dependence when `kind='both'`. .. versionadded:: 0.24" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the selected samples when subsamples is not `None` and `kind` is either `'both'` or `'individual'`. See :term:`Glossary ` for details. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide `.\n\n.. note::\n\n :func:`plot_partial_dependence` does not support using the same axes\n with multiple calls. To plot the the partial dependence for multiple\n estimators, please pass the axes created by the first call to the\n second call::\n\n >>> from sklearn.inspection import plot_partial_dependence\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> X, y = make_friedman1()\n >>> est1 = LinearRegression().fit(X, y)\n >>> est2 = RandomForestRegressor().fit(X, y)\n >>> disp1 = plot_partial_dependence(est1, X,\n ... [1, 2]) # doctest: +SKIP\n >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n ... ax=disp1.axes_) # doctest: +SKIP\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n The target features for which to create the PDPs.\n If `features[i]` is an integer or a string, a one-way PDP is created;\n if `features[i]` is a tuple, a two-way PDP is created (only supported\n with `kind='average'`). Each tuple must be of size 2.\n if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n Name of each feature; `feature_names[i]` holds the name of the feature\n with index `i`.\n By default, the name of the feature corresponds to their numerical\n index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is `'recursion'`, the response is always the output of\n :term:`decision_function`.\n\nn_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when `ax`\n is a single axis or `None`.\n\ngrid_resolution : int, default=100\n The number of equally spaced points on the axes of the plots, for each\n target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`\n but is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the ICEs by design, it is not compatible with ICE and\n thus `kind` must be `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n The number of CPUs to use to compute the partial dependences.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n Verbose output during PD computations.\n\nline_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n .. versionadded:: 0.22\n\nkind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If `float`, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If `int`, represents the\n absolute number samples to use.\n\n Note that the full dataset is still used to calculate averaged partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None` and `kind` is either `'both'` or `'individual'`.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.inspection._plot", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection._plot.tests.test_plot_partial_dependence", - "imports": [ - "import numpy as np", - "from scipy.stats.mstats import mquantiles", - "import pytest", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.utils._testing import _convert_container", - "from sklearn.inspection import plot_partial_dependence" - ], - "classes": [], - "functions": [ - { - "name": "diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "clf_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_kind", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_str_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_custom_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_passing_numpy_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_incorrent_num_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_with_same_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_feature_name_reuse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_multiclass_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_does_not_override_ylabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_subsampling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_overwrite_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that make sure that we can overwrite the label of the PDP plot" - } - ] - }, - { - "name": "sklearn.inspection._plot.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model.setup", - "imports": [ - "import os", - "import numpy", - "from sklearn._build_utils import gen_from_templates", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model._base", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numbers", - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from scipy import optimize", - "from scipy import sparse", - "from scipy.special import expit", - "from joblib import Parallel", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import check_array", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from utils import check_random_state", - "from utils.extmath import safe_sparse_dot", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.sparsefuncs import inplace_column_scale", - "from utils.fixes import sparse_lsqr", - "from utils._seq_dataset import ArrayDataset32", - "from utils._seq_dataset import CSRDataset32", - "from utils._seq_dataset import ArrayDataset64", - "from utils._seq_dataset import CSRDataset64", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.fixes import delayed", - "from preprocessing import normalize as f_normalize" - ], - "classes": [ - { - "name": "LinearModel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\nC : array, shape (n_samples,)\n Returns predicted values." - }, - { - "name": "_set_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the intercept_\n " - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for Linear Models" - }, - { - "name": "LinearClassifierMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict confidence scores for samples.\n\nThe confidence score for a sample is proportional to the signed\ndistance of that sample to the hyperplane.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\narray, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence scores per (sample, class) combination. In the binary\n case, confidence score for self.classes_[1] where >0 means this\n class would be predicted." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class labels for samples in X.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\nC : array, shape [n_samples]\n Predicted class label per sample." - }, - { - "name": "_predict_proba_lr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimation for OvR logistic regression.\n\nPositive class probabilities are computed as\n1. / (1. + np.exp(-self.decision_function(X)));\nmulticlass is handled by normalizing that over all classes." - } - ], - "docstring": "Mixin for linear classifiers.\n\nHandles prediction for sparse and dense X." - }, - { - "name": "SparseCoefMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "densify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert coefficient matrix to dense array format.\n\nConverts the ``coef_`` member (back) to a numpy.ndarray. This is the\ndefault format of ``coef_`` and is required for fitting, so calling\nthis method is only required on models that have previously been\nsparsified; otherwise, it is a no-op.\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "sparsify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert coefficient matrix to sparse format.\n\nConverts the ``coef_`` member to a scipy.sparse matrix, which for\nL1-regularized models can be much more memory- and storage-efficient\nthan the usual numpy.ndarray representation.\n\nThe ``intercept_`` member is not converted.\n\nReturns\n-------\nself\n Fitted estimator.\n\nNotes\n-----\nFor non-sparse models, i.e. when there are not many zeros in ``coef_``,\nthis may actually *increase* memory usage, so use this method with\ncare. A rule of thumb is that the number of zero elements, which can\nbe computed with ``(coef_ == 0).sum()``, must be more than 50% for this\nto provide significant benefits.\n\nAfter calling this method, further fitting with the partial_fit\nmethod (if any) will not work until you call densify." - } - ], - "docstring": "Mixin for converting coef_ to and from CSR format.\n\nL1-regularizing estimators should inherit this." - }, - { - "name": "LinearRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This will only provide speedup for n_targets > 1 and sufficient large problems. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive. This option is only supported for dense arrays. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary" - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.17\n parameter *sample_weight* support to LinearRegression.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This will only provide\n speedup for n_targets > 1 and sufficient large problems.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive. This\n option is only supported for dense arrays.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : array of shape (n_features, ) or (n_targets, n_features)\n Estimated coefficients for the linear regression problem.\n If multiple targets are passed during the fit (y 2D), this\n is a 2D array of shape (n_targets, n_features), while if only\n one target is passed, this is a 1D array of length n_features.\n\nrank_ : int\n Rank of matrix `X`. Only available when `X` is dense.\n\nsingular_ : array of shape (min(X, y),)\n Singular values of `X`. Only available when `X` is dense.\n\nintercept_ : float or array of shape (n_targets,)\n Independent term in the linear model. Set to 0.0 if\n `fit_intercept = False`.\n\nSee Also\n--------\nRidge : Ridge regression addresses some of the\n problems of Ordinary Least Squares by imposing a penalty on the\n size of the coefficients with l2 regularization.\nLasso : The Lasso is a linear model that estimates\n sparse coefficients with l1 regularization.\nElasticNet : Elastic-Net is a linear regression\n model trained with both l1 and l2 -norm regularization of the\n coefficients.\n\nNotes\n-----\nFrom the implementation point of view, this is just plain Ordinary\nLeast Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n(scipy.optimize.nnls) wrapped as a predictor object.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n>>> # y = 1 * x_0 + 2 * x_1 + 3\n>>> y = np.dot(X, np.array([1, 2])) + 3\n>>> reg = LinearRegression().fit(X, y)\n>>> reg.score(X, y)\n1.0\n>>> reg.coef_\narray([1., 2.])\n>>> reg.intercept_\n3.0000...\n>>> reg.predict(np.array([[3, 5]]))\narray([16.])" - } - ], - "functions": [ - { - "name": "make_dataset", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create ``Dataset`` abstraction for sparse and dense inputs.\n\nThis also returns the ``intercept_decay`` which is different\nfor sparse datasets.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data\n\ny : array-like, shape (n_samples, )\n Target values.\n\nsample_weight : numpy array of shape (n_samples,)\n The weight of each sample\n\nrandom_state : int, RandomState instance or None (default)\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndataset\n The ``Dataset`` abstraction\nintercept_decay\n The intercept decay" - }, - { - "name": "_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\n X = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nreturn_mean=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype" - }, - { - "name": "_rescale_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight.\n\nReturns\n-------\nX_rescaled : {array-like, sparse matrix}\n\ny_rescaled : {array-like, sparse matrix}" - }, - { - "name": "_pre_fit", - "decorators": [], - "parameters": [ - { - "name": "order", - "type": "Optional[Literal['F', 'C']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether X and y will be forced to be fortran or c-style. Only relevant if sample_weight is not None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Aux function used at beginning of fit in linear models\n\nParameters\n----------\norder : 'F', 'C' or None, default=None\n Whether X and y will be forced to be fortran or c-style. Only relevant\n if sample_weight is not None." - } - ] - }, - { - "name": "sklearn.linear_model._bayes", - "imports": [ - "from math import log", - "import numpy as np", - "from scipy import linalg", - "from _base import LinearModel", - "from _base import _rescale_data", - "from base import RegressorMixin", - "from utils.extmath import fast_logdet", - "from scipy.linalg import pinvh", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BayesianRidge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. Should be greater than or equal to 1." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Stop the algorithm if w has converged." - }, - { - "name": "alpha_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "alpha_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "lambda_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "lambda_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "alpha_init", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for alpha (precision of the noise). If not set, alpha_init is 1/Var(y). .. versionadded:: 0.22" - }, - { - "name": "lambda_init", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for lambda (precision of the weights). If not set, lambda_init is 1. .. versionadded:: 0.22" - }, - { - "name": "compute_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, compute the log marginal likelihood at each iteration of the optimization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. The intercept is not treated as a probabilistic parameter and thus has no associated variance. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode when fitting the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary" - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample .. versionadded:: 0.20 parameter *sample_weight* support to BayesianRidge." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data\ny : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.20\n parameter *sample_weight* support to BayesianRidge.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the standard deviation of posterior prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points." - }, - { - "name": "_update_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n + np.dot(X.T, X))^-1" - }, - { - "name": "_log_marginal_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Log marginal likelihood." - } - ], - "docstring": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_iter : int, default=300\n Maximum number of iterations. Should be greater than or equal to 1.\n\ntol : float, default=1e-3\n Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\nalpha_init : float, default=None\n Initial value for alpha (precision of the noise).\n If not set, alpha_init is 1/Var(y).\n\n .. versionadded:: 0.22\n\nlambda_init : float, default=None\n Initial value for lambda (precision of the weights).\n If not set, lambda_init is 1.\n\n .. versionadded:: 0.22\n\ncompute_score : bool, default=False\n If True, compute the log marginal likelihood at each iteration of the\n optimization.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model.\n The intercept is not treated as a probabilistic parameter\n and thus has no associated variance. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\nintercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float\n Estimated precision of the noise.\n\nlambda_ : float\n Estimated precision of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n Estimated variance-covariance matrix of the weights\n\nscores_ : array-like of shape (n_iter_+1,)\n If computed_score is True, value of the log marginal likelihood (to be\n maximized) at each iteration of the optimization. The array starts\n with the value of the log marginal likelihood obtained for the initial\n values of alpha and lambda and ends with the value obtained for the\n estimated alpha and lambda.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\nX_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\nX_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.BayesianRidge()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nBayesianRidge()\n>>> clf.predict([[1, 1]])\narray([1.])\n\nNotes\n-----\nThere exist several strategies to perform Bayesian ridge regression. This\nimplementation is based on the algorithm described in Appendix A of\n(Tipping, 2001) where updates of the regularization parameters are done as\nsuggested in (MacKay, 1992). Note that according to A New\nView of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\nupdate rules do not guarantee that the marginal likelihood is increasing\nbetween two consecutive iterations of the optimization.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\nVol. 4, No. 3, 1992.\n\nM. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\nJournal of Machine Learning Research, Vol. 1, 2001." - }, - { - "name": "ARDRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Stop the algorithm if w has converged." - }, - { - "name": "alpha_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "alpha_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "lambda_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "lambda_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "compute_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, compute the objective function at each step of the model." - }, - { - "name": "threshold_lambda", - "type": "float", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "threshold for removing (pruning) weights with high precision from the computation." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode when fitting the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers). Will be cast to X's dtype if necessary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the ARDRegression model according to the given training data\nand parameters.\n\nIterative procedure to maximize the evidence\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\ny : array-like of shape (n_samples,)\n Target values (integers). Will be cast to X's dtype if necessary\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_update_sigma_woodbury", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_sigma", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the standard deviation of posterior prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points." - } - ], - "docstring": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_iter : int, default=300\n Maximum number of iterations.\n\ntol : float, default=1e-3\n Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\ncompute_score : bool, default=False\n If True, compute the objective function at each step of the model.\n\nthreshold_lambda : float, default=10 000\n threshold for removing (pruning) weights with high precision from\n the computation.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\nalpha_ : float\n estimated precision of the noise.\n\nlambda_ : array-like of shape (n_features,)\n estimated precisions of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n estimated variance-covariance matrix of the weights\n\nscores_ : float\n if computed, value of the objective function (to be maximized)\n\nintercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nX_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\nX_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.ARDRegression()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nARDRegression()\n>>> clf.predict([[1, 1]])\narray([1.])\n\nNotes\n-----\nFor an example, see :ref:`examples/linear_model/plot_ard.py\n`.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian nonlinear modeling for the prediction\ncompetition, ASHRAE Transactions, 1994.\n\nR. Salakhutdinov, Lecture notes on Statistical Machine Learning,\nhttp://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\nTheir beta is our ``self.alpha_``\nTheir alpha is our ``self.lambda_``\nARD is a little different than the slide: only dimensions/features for\nwhich ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\ndiscarded." - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._coordinate_descent", - "imports": [ - "import sys", - "import warnings", - "import numbers", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy import sparse", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _base import LinearModel", - "from _base import _pre_fit", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from _base import _preprocess_data", - "from utils import check_array", - "from utils.validation import check_random_state", - "from model_selection import check_cv", - "from utils.extmath import safe_sparse_dot", - "from utils.fixes import _astype_copy_false", - "from utils.fixes import _joblib_parallel_args", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from None import _cd_fast as cd_fast" - ], - "classes": [ - { - "name": "ElasticNet", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty terms. Defaults to 1.0. See the notes for the exact mathematical meaning of this parameter. ``alpha = 0`` is equivalent to an ordinary least square, solved by the :class:`LinearRegression` object. For numerical reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised. Given this, you should use the :class:`LinearRegression` object." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If ``False``, the data is assumed to be already centered." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. The Gram matrix can also be passed as argument. For sparse input this option is always ``True`` to preserve sparsity." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target. Will be cast to X's dtype if necessary." - }, - { - "name": "sample_weight", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weight. .. versionadded:: 0.23" - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model with coordinate descent.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of (n_samples, n_features)\n Data.\n\ny : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)\n Target. Will be cast to X's dtype if necessary.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Sample weight.\n\n .. versionadded:: 0.23\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nNotes\n-----\n\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format." - }, - { - "name": "sparse_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sparse representation of the fitted `coef_`." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decision function of the linear model.\n\nParameters\n----------\nX : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\nReturns\n-------\nT : ndarray of shape (n_samples,)\n The predicted decision function." - } - ], - "docstring": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * L1 + b * L2\n\nwhere::\n\n alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the penalty terms. Defaults to 1.0.\n See the notes for the exact mathematical meaning of this\n parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n solved by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\nl1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a\n combination of L1 and L2.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If ``False``, the\n data is assumed to be already centered.\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or array-like of shape (n_features, n_features), default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. The Gram matrix can also be passed as argument.\n For sparse input this option is always ``True`` to preserve sparsity.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNet\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNet(random_state=0)\n>>> regr.fit(X, y)\nElasticNet(random_state=0)\n>>> print(regr.coef_)\n[18.83816048 64.55968825]\n>>> print(regr.intercept_)\n1.451...\n>>> print(regr.predict([[0, 0]]))\n[1.451...]\n\n\nNotes\n-----\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nSee Also\n--------\nElasticNetCV : Elastic net model with best model selection by\n cross-validation.\nSGDRegressor : Implements elastic net regression with incremental training.\nSGDClassifier : Implements logistic regression with elastic net penalty\n (``SGDClassifier(loss=\"log\", penalty=\"elasticnet\")``)." - }, - { - "name": "Lasso", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the L1 term. Defaults to 1.0. ``alpha = 0`` is equivalent to an ordinary least square, solved by the :class:`LinearRegression` object. For numerical reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised. Given this, you should use the :class:`LinearRegression` object." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument. For sparse input this option is always ``True`` to preserve sparsity." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear Model trained with L1 prior as regularizer (aka the Lasso)\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1 term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument. For sparse input\n this option is always ``True`` to preserve sparsity.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\nsparse_coef_ : sparse matrix of shape (n_features, 1) or (n_targets, n_features)\n Readonly property derived from ``coef_``.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : int or list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.Lasso(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nLasso(alpha=0.1)\n>>> print(clf.coef_)\n[0.85 0. ]\n>>> print(clf.intercept_)\n0.15...\n\nSee Also\n--------\nlars_path\nlasso_path\nLassoLars\nLassoCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array." - }, - { - "name": "LinearModelCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Model to be fitted after the best alpha has been determined." - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bool indicating if class is meant for multidimensional target." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If y is mono-output, X can be sparse." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit linear model with coordinate descent.\n\nFit is on grid of alphas and best alpha estimated by cross-validation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data\n to avoid unnecessary memory duplication. If y is mono-output,\n X can be sparse.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values." - } - ], - "docstring": "Base class for iterative model fitting along a regularization path." - }, - { - "name": "LassoCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If ``None`` alphas are set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If positive, restrict regression coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n If positive, restrict regression coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n The dual gap at the end of the optimization for the optimal alpha\n (``alpha_``).\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.predict(X[:1,])\narray([-78.4951...])\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nSee Also\n--------\nlars_path\nlasso_path\nLassoLars\nLasso\nLassoLarsCV" - }, - { - "name": "ElasticNetCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "l1_ratio", - "type": "Union[List[float], float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2 This parameter can be a list, in which case the different values are tested by cross-validation and the one giving the best prediction score is used. Note that a good choice of list of values for l1_ratio is often to put more values close to 1 (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7, .9, .95, .99, 1]``." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path, used for each l1_ratio." - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If None alphas are set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0``\n the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path, used for each l1_ratio.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nl1_ratio_ : float\n The compromise between l1 and l2 penalization chosen by\n cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets, n_features)\n Independent term in the decision function.\n\nmse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n Mean square error for the test set on each fold, varying l1_ratio and\n alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\ndual_gap_ : float\n The dual gaps at the end of the optimization for the optimal alpha.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNetCV\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNetCV(cv=5, random_state=0)\n>>> regr.fit(X, y)\nElasticNetCV(cv=5, random_state=0)\n>>> print(regr.alpha_)\n0.199...\n>>> print(regr.intercept_)\n0.398...\n>>> print(regr.predict([[0, 0]]))\n[0.398...]\n\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package\nwhile alpha corresponds to the lambda parameter in glmnet.\nMore specifically, the optimization objective is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * L1 + b * L2\n\nfor::\n\n alpha = a + b and l1_ratio = a / (a + b).\n\nSee Also\n--------\nenet_path\nElasticNet" - }, - { - "name": "MultiTaskElasticNet", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the L1/L2 term. Defaults to 1.0." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The ElasticNet mixing parameter, with 0 < l1_ratio <= 1. For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it is an L2 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target. Will be cast to X's dtype if necessary." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit MultiTaskElasticNet model with coordinate descent\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data.\ny : ndarray of shape (n_samples, n_tasks)\n Target. Will be cast to X's dtype if necessary.\n\nNotes\n-----\n\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as\nregularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nl1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula). If a 1D y is\n passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : float\n The dual gaps at the end of the optimization.\n\neps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNet(alpha=0.1)\n>>> print(clf.coef_)\n[[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n>>> print(clf.intercept_)\n[0.0872422 0.0872422]\n\nSee Also\n--------\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n cross-validation.\nElasticNet\nMultiTaskLasso\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - }, - { - "name": "MultiTaskLasso", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the L1/L2 term. Defaults to 1.0." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\neps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n>>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\nMultiTaskLasso(alpha=0.1)\n>>> print(clf.coef_)\n[[0. 0.60809415]\n[0. 0.94592424]]\n>>> print(clf.intercept_)\n[-0.41888636 -0.87382323]\n\nSee Also\n--------\nMultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\nLasso\nMultiTaskElasticNet\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - }, - { - "name": "MultiTaskElasticNetCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "l1_ratio", - "type": "Union[List[float], float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The ElasticNet mixing parameter, with 0 < l1_ratio <= 1. For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it is an L2 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2. This parameter can be a list, in which case the different values are tested by cross-validation and the one giving the best prediction score is used. Note that a good choice of list of values for l1_ratio is often to put more values close to 1 (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7, .9, .95, .99, 1]``" - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If not provided, set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. Note that this is used only if multiple values for l1_ratio are given. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds) or (n_l1_ratio, n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\nl1_ratio_ : float\n Best l1_ratio obtained by cross-validation.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n>>> clf.fit([[0,0], [1, 1], [2, 2]],\n... [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNetCV(cv=3)\n>>> print(clf.coef_)\n[[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n>>> print(clf.intercept_)\n[0.00166409 0.00166409]\n\nSee Also\n--------\nMultiTaskElasticNet\nElasticNetCV\nMultiTaskLassoCV\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - }, - { - "name": "MultiTaskLassoCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If not provided, set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. Note that this is used only if multiple values for l1_ratio are given. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import MultiTaskLassoCV\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.metrics import r2_score\n>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n>>> r2_score(y, reg.predict(X))\n0.9994...\n>>> reg.alpha_\n0.5713...\n>>> reg.predict(X[:1,])\narray([[153.7971..., 94.9015...]])\n\nSee Also\n--------\nMultiTaskElasticNet\nElasticNetCV\nMultiTaskElasticNetCV\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - } - ], - "functions": [ - { - "name": "_set_order", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "order", - "type": "Optional[Literal['C', 'F']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'C', dense arrays are returned as C-ordered, sparse matrices in csr format. If 'F', dense arrays are return as F-ordered, sparse matrices in csc format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Change the order of X and y if necessary.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\norder : {None, 'C', 'F'}\n If 'C', dense arrays are returned as C-ordered, sparse matrices in csr\n format. If 'F', dense arrays are return as F-ordered, sparse matrices\n in csc format.\n\nReturns\n-------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data with guaranteed order.\n\ny : ndarray of shape (n_samples,)\n Target values with guaranteed order." - }, - { - "name": "_alpha_grid", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The elastic net mixing parameter, with ``0 < l1_ratio <= 1``. For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not supported) ``For l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``" - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit an intercept or not" - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the grid of alpha values for elastic net parameter search\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication\n\ny : ndarray of shape (n_samples,)\n Target values\n\nXy : array-like of shape (n_features,), default=None\n Xy = np.dot(X.T, y) that can be precomputed.\n\nl1_ratio : float, default=1.0\n The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n Number of alphas along the regularization path\n\nfit_intercept : bool, default=True\n Whether to fit an intercept or not\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "lasso_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If ``y`` is mono-output then ``X`` can be sparse." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``" - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path" - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If ``None`` alphas are set automatically" - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial values of the coefficients." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether to return the number of iterations or not." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to True, forces coefficients to be positive. (Only allowed when ``y.ndim == 1``)." - }, - { - "name": "**params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "keyword arguments passed to the coordinate descent solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Lasso path with coordinate descent\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n Number of alphas along the regularization path\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n whether to return the number of iterations or not.\n\npositive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\n**params : kwargs\n keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nNote that in certain cases, the Lars solver may be significantly\nfaster to implement this functionality. In particular, linear\ninterpolation can be used to retrieve model coefficients between the\nvalues output by lars_path\n\nExamples\n--------\n\nComparing lasso_path and lars_path with interpolation:\n\n>>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n>>> y = np.array([1, 2, 3.1])\n>>> # Use lasso_path to compute a coefficient path\n>>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n>>> print(coef_path)\n[[0. 0. 0.46874778]\n [0.2159048 0.4425765 0.23689075]]\n\n>>> # Now use lars_path and 1D linear interpolation to compute the\n>>> # same path\n>>> from sklearn.linear_model import lars_path\n>>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n>>> from scipy import interpolate\n>>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n... coef_path_lars[:, ::-1])\n>>> print(coef_path_continuous([5., 1., .5]))\n[[0. 0. 0.46915237]\n [0.2159048 0.4425765 0.23668876]]\n\nSee Also\n--------\nlars_path\nLasso\nLassoLars\nLassoCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode" - }, - { - "name": "enet_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If ``y`` is mono-output then ``X`` can be sparse." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Number between 0 and 1 passed to elastic net (scaling between l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If None alphas are set automatically." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial values of the coefficients." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations or not." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to True, forces coefficients to be positive. (Only allowed when ``y.ndim == 1``)." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If set to False, the input validation checks are skipped (including the Gram matrix when provided). It is assumed that they are handled by the caller." - }, - { - "name": "**params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments passed to the coordinate descent solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nl1_ratio : float, default=0.5\n Number between 0 and 1 passed to elastic net (scaling between\n l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations or not.\n\npositive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\ncheck_input : bool, default=True\n If set to False, the input validation checks are skipped (including the\n Gram matrix when provided). It is assumed that they are handled\n by the caller.\n\n**params : kwargs\n Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n (Is returned when ``return_n_iter`` is set to True).\n\nSee Also\n--------\nMultiTaskElasticNet\nMultiTaskElasticNetCV\nElasticNet\nElasticNetCV\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n`." - }, - { - "name": "_path_residuals", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "train", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the train set." - }, - { - "name": "test", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the test set." - }, - { - "name": "path", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function returning a list of models on the path. See enet_path for an example of signature." - }, - { - "name": "path_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the path function." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of float that is used for cross-validation. If not provided, computed using 'path'." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2." - }, - { - "name": "X_order", - "type": "Literal['F', 'C']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The order of the arrays expected by the path function to avoid memory copies." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dtype of the arrays expected by the path function to avoid memory copies." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the MSE for the models computed by 'path'.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\ntrain : list of indices\n The indices of the train set.\n\ntest : list of indices\n The indices of the test set.\n\npath : callable\n Function returning a list of models on the path. See\n enet_path for an example of signature.\n\npath_params : dictionary\n Parameters passed to the path function.\n\nalphas : array-like, default=None\n Array of float that is used for cross-validation. If not\n provided, computed using 'path'.\n\nl1_ratio : float, default=1\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\nX_order : {'F', 'C'}, default=None\n The order of the arrays expected by the path function to\n avoid memory copies.\n\ndtype : a numpy dtype, default=None\n The dtype of the arrays expected by the path function to\n avoid memory copies." - } - ] - }, - { - "name": "sklearn.linear_model._huber", - "imports": [ - "import numpy as np", - "from scipy import optimize", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from _base import LinearModel", - "from utils import axis0_safe_slice", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import safe_sparse_dot", - "from utils.optimize import _check_optimize_result" - ], - "classes": [ - { - "name": "HuberRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The parameter epsilon controls the number of samples that should be classified as outliers. The smaller the epsilon, the more robust it is to outliers." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations that ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This is useful if the stored attributes of a previously used model has to be reused. If set to False, then the coefficients will be rewritten for every call to fit. See :term:`the Glossary `." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to fit the intercept. This can be set to False if the data is already centered around the origin." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-05", - "limitation": null, - "ignored": false, - "docstring": "The iteration will stop when ``max{|proj g_i | i = 1, ..., n}`` <= ``tol`` where pg_i is the i-th component of the projected gradient." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight given to each sample." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like, shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like, shape (n_samples,)\n Weight given to each sample.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThis makes sure that the loss function is not heavily influenced by the\noutliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide `\n\n.. versionadded:: 0.18\n\nParameters\n----------\nepsilon : float, greater than 1.0, default=1.35\n The parameter epsilon controls the number of samples that should be\n classified as outliers. The smaller the epsilon, the more robust it is\n to outliers.\n\nmax_iter : int, default=100\n Maximum number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\nalpha : float, default=0.0001\n Regularization parameter.\n\nwarm_start : bool, default=False\n This is useful if the stored attributes of a previously used model\n has to be reused. If set to False, then the coefficients will\n be rewritten for every call to fit.\n See :term:`the Glossary `.\n\nfit_intercept : bool, default=True\n Whether or not to fit the intercept. This can be set to False\n if the data is already centered around the origin.\n\ntol : float, default=1e-05\n The iteration will stop when\n ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n where pg_i is the i-th component of the projected gradient.\n\nAttributes\n----------\ncoef_ : array, shape (n_features,)\n Features got by optimizing the Huber loss.\n\nintercept_ : float\n Bias.\n\nscale_ : float\n The value by which ``|y - X'w - c|`` is scaled down.\n\nn_iter_ : int\n Number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\noutliers_ : array, shape (n_samples,)\n A boolean mask which is set to True where the samples are identified\n as outliers.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import HuberRegressor, LinearRegression\n>>> from sklearn.datasets import make_regression\n>>> rng = np.random.RandomState(0)\n>>> X, y, coef = make_regression(\n... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n>>> X[:4] = rng.uniform(10, 20, (4, 2))\n>>> y[:4] = rng.uniform(10, 20, 4)\n>>> huber = HuberRegressor().fit(X, y)\n>>> huber.score(X, y)\n-7.284...\n>>> huber.predict(X[:1,])\narray([806.7200...])\n>>> linear = LinearRegression().fit(X, y)\n>>> print(\"True coefficients:\", coef)\nTrue coefficients: [20.4923... 34.1698...]\n>>> print(\"Huber coefficients:\", huber.coef_)\nHuber coefficients: [17.7906... 31.0106...]\n>>> print(\"Linear Regression coefficients:\", linear.coef_)\nLinear Regression coefficients: [-1.9221... 7.0226...]\n\nReferences\n----------\n.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n Concomitant scale estimates, pg 172\n.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n https://statweb.stanford.edu/~owen/reports/hhu.pdf" - } - ], - "functions": [ - { - "name": "_huber_loss_and_gradient", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vector. w[:n_features] gives the coefficients w[-1] gives the scale factor and if the intercept is fit w[-2] gives the intercept factor." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Robustness of the Huber estimator." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight assigned to each sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the Huber loss and the gradient.\n\nParameters\n----------\nw : ndarray, shape (n_features + 1,) or (n_features + 2,)\n Feature vector.\n w[:n_features] gives the coefficients\n w[-1] gives the scale factor and if the intercept is fit w[-2]\n gives the intercept factor.\n\nX : ndarray of shape (n_samples, n_features)\n Input data.\n\ny : ndarray of shape (n_samples,)\n Target vector.\n\nepsilon : float\n Robustness of the Huber estimator.\n\nalpha : float\n Regularization parameter.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Weight assigned to each sample.\n\nReturns\n-------\nloss : float\n Huber loss.\n\ngradient : ndarray, shape (len(w))\n Returns the derivative of the Huber loss with respect to each\n coefficient, intercept and the scale as a vector." - } - ] - }, - { - "name": "sklearn.linear_model._least_angle", - "imports": [ - "from math import log", - "import sys", - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy import interpolate", - "from scipy.linalg.lapack import get_lapack_funcs", - "from joblib import Parallel", - "from _base import LinearModel", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import arrayfuncs", - "from utils import as_float_array", - "from utils import check_random_state", - "from model_selection import check_cv", - "from exceptions import ConvergenceWarning", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "Lars", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Target number of non-zero coefficients. Use ``np.inf`` for no limit." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "fit_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha." - }, - { - "name": "jitter", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on a uniform noise parameter to be added to the `y` values, to satisfy the model's assumption of one-at-a-time computations. Might help with stability. .. versionadded:: 0.23" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for jittering. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. Ignored if `jitter` is None. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Auxiliary method to fit the model using X, y as training data" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Least Angle Regression model a.k.a. LAR\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nn_nonzero_coefs : int, default=500\n Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n If True the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\njitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\nactive_ : list of shape (n_alphas,) or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list of such arrays\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.Lars(n_nonzero_coefs=1)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLars(n_nonzero_coefs=1)\n>>> print(reg.coef_)\n[ 0. -1.11...]\n\nSee Also\n--------\nlars_path, LarsCV\nsklearn.decomposition.sparse_encode" - }, - { - "name": "LassoLars", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term. Defaults to 1.0. ``alpha = 0`` is equivalent to an ordinary least square, solved by :class:`LinearRegression`. For numerical reasons, using ``alpha = 0`` with the LassoLars object is not advised and you should prefer the LinearRegression object." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "fit_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True`` the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator." - }, - { - "name": "jitter", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on a uniform noise parameter to be added to the `y` values, to satisfy the model's assumption of one-at-a-time computations. Might help with stability. .. versionadded:: 0.23" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for jittering. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. Ignored if `jitter` is None. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Lasso model fit with Least Angle Regression a.k.a. Lars\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the penalty term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by :class:`LinearRegression`. For numerical reasons, using\n ``alpha = 0`` with the LassoLars object is not advised and you\n should prefer the LinearRegression object.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n If ``True`` the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients will not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n\njitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\nactive_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list of such arrays\n If a list is passed it's expected to be one of n_targets such arrays.\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLars(alpha=0.01)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\nLassoLars(alpha=0.01)\n>>> print(reg.coef_)\n[ 0. -0.963257...]\n\nSee Also\n--------\nlars_path\nlasso_path\nLasso\nLassoCV\nLassoLarsCV\nLassoLarsIC\nsklearn.decomposition.sparse_encode" - }, - { - "name": "LarsCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix cannot be passed as argument since we will use only subsets of X." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "max_n_alphas", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of points on the path used to compute the residuals in the cross-validation" - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\nn_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nAttributes\n----------\nactive_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of lists, the outer list length is `n_targets`.\n\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\nalpha_ : float\n the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import LarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n>>> reg = LarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9996...\n>>> reg.alpha_\n0.0254...\n>>> reg.predict(X[:1,])\narray([154.0842...])\n\nSee Also\n--------\nlars_path, LassoLars, LassoLarsCV" - }, - { - "name": "LassoLarsCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix cannot be passed as argument since we will use only subsets of X." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "max_n_alphas", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of points on the path used to compute the residuals in the cross-validation" - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients do not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator. As a consequence using LassoLarsCV only makes sense for problems where a sparse solution is expected and/or reached." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or 'auto' , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\nn_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsCV only makes sense for problems where\n a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function.\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\nalpha_ : float\n the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\nactive_ : list of int\n Indices of active variables at the end of the path.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoLarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4.0, random_state=0)\n>>> reg = LassoLarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9992...\n>>> reg.alpha_\n0.0484...\n>>> reg.predict(X[:1,])\narray([-77.8723...])\n\nNotes\n-----\n\nThe object solves the same problem as the LassoCV object. However,\nunlike the LassoCV, it find the relevant alphas values by itself.\nIn general, because of this property, it will be more stable.\nHowever, it is more fragile to heavily multicollinear datasets.\n\nIt is more efficient than the LassoCV if only a small number of\nfeatures are selected compared to the total number, for instance if\nthere are very few samples compared to the number of features.\n\nSee Also\n--------\nlars_path, LassoLars, LarsCV, LassoCV" - }, - { - "name": "LassoLarsIC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal['bic', 'aic']", - "hasDefault": true, - "default": "'aic'", - "limitation": null, - "ignored": false, - "docstring": "The type of criterion to use." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform. Can be used for early stopping." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients do not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator. As a consequence using LassoLarsIC only makes sense for problems where a sparse solution is expected and/or reached." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "target values. Will be cast to X's dtype if necessary" - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If provided, this parameter will override the choice of copy_X made at instance creation. If ``True``, X will be copied; else, it may be overwritten." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n training data.\n\ny : array-like of shape (n_samples,)\n target values. Will be cast to X's dtype if necessary\n\ncopy_X : bool, default=None\n If provided, this parameter will override the choice\n of copy_X made at instance creation.\n If ``True``, X will be copied; else, it may be overwritten.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Lasso model fit with Lars using BIC or AIC for model selection\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion and BIC is the Bayes\nInformation criterion. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {'bic' , 'aic'}, default='aic'\n The type of criterion to use.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform. Can be used for\n early stopping.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsIC only makes sense for problems where\n a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function.\n\nalpha_ : float\n the alpha parameter chosen by the information criterion\n\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If a list, it will be of length `n_targets`.\n\nn_iter_ : int\n number of iterations run by lars_path to find the grid of\n alphas.\n\ncriterion_ : array-like of shape (n_alphas,)\n The value of the information criteria ('aic', 'bic') across all\n alphas. The alpha which has the smallest information criterion is\n chosen. This value is larger by a factor of ``n_samples`` compared to\n Eqns. 2.15 and 2.16 in (Zou et al, 2007).\n\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLarsIC(criterion='bic')\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLassoLarsIC(criterion='bic')\n>>> print(reg.coef_)\n[ 0. -1.11...]\n\nNotes\n-----\nThe estimation of the number of degrees of freedom is given by:\n\n\"On the degrees of freedom of the lasso\"\nHui Zou, Trevor Hastie, and Robert Tibshirani\nAnn. Statist. Volume 35, Number 5 (2007), 2173-2192.\n\nhttps://en.wikipedia.org/wiki/Akaike_information_criterion\nhttps://en.wikipedia.org/wiki/Bayesian_information_criterion\n\nSee Also\n--------\nlars_path, LassoLars, LassoLarsCV" - } - ], - "functions": [ - { - "name": "lars_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. Note that if X is None then the Gram matrix must be specified, i.e., cannot be None or False." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "Gram", - "type": "Optional[Union[Literal['auto'], ArrayLike]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform, set to infinity for no limit." - }, - { - "name": "alpha_min", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``X`` is overwritten." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``Gram`` is overwritten." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls output verbosity." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``return_path==True`` returns the entire path, else returns only the last point of the path." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. This option is only allowed with method 'lasso'. Note that the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent lasso_path function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Input data. Note that if X is None then the Gram matrix must be\n specified, i.e., cannot be None or False.\n\ny : None or array-like of shape (n_samples,)\n Input targets.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nGram : None, 'auto', array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlars_path_gram\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_" - }, - { - "name": "lars_path_gram", - "decorators": [], - "parameters": [ - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y)." - }, - { - "name": "Gram", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gram = np.dot(X.T * X)." - }, - { - "name": "n_samples", - "type": "Union[float, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Equivalent size of sample." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform, set to infinity for no limit." - }, - { - "name": "alpha_min", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``X`` is overwritten." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``Gram`` is overwritten." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls output verbosity." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``return_path==True`` returns the entire path, else returns only the last point of the path." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. This option is only allowed with method 'lasso'. Note that the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent lasso_path function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "lars_path in the sufficient stats mode [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nXy : array-like of shape (n_samples,) or (n_samples, n_targets)\n Xy = np.dot(X.T, y).\n\nGram : array-like of shape (n_features, n_features)\n Gram = np.dot(X.T * X).\n\nn_samples : int or float\n Equivalent size of sample.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlars_path\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_" - }, - { - "name": "_lars_path_solver", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. Note that if X is None then Gram must be specified, i.e., cannot be None or False." - }, - { - "name": "y", - "type": "Optional[NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "`Xy = np.dot(X.T, y)` that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "Gram", - "type": "Optional[Union[Literal['auto'], ArrayLike]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features." - }, - { - "name": "n_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Equivalent size of sample. If `None`, it will be `n_samples`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform, set to infinity for no limit." - }, - { - "name": "alpha_min", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``X`` is overwritten." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``Gram`` is overwritten." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls output verbosity." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``return_path==True`` returns the entire path, else returns only the last point of the path." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. This option is only allowed with method 'lasso'. Note that the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent lasso_path function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : None or ndarray of shape (n_samples, n_features)\n Input data. Note that if X is None then Gram must be specified,\n i.e., cannot be None or False.\n\ny : None or ndarray of shape (n_samples,)\n Input targets.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n `Xy = np.dot(X.T, y)` that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nGram : None, 'auto' or array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\nn_samples : int or float, default=None\n Equivalent size of sample. If `None`, it will be `n_samples`.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlasso_path\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_" - }, - { - "name": "_check_copy_and_writeable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_lars_path_residues", - "decorators": [], - "parameters": [ - { - "name": "X_train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit the LARS on" - }, - { - "name": "y_train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to fit LARS on" - }, - { - "name": "X_test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to compute the residues on" - }, - { - "name": "y_test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to compute the residues on" - }, - { - "name": "Gram", - "type": "Optional[Union[Literal['auto'], ArrayLike]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether X_train, X_test, y_train and y_test should be copied; if False, they may be overwritten." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the amount of verbosity" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. See reservations for using this option in combination with method 'lasso' for expected small values of alpha in the doc of LassoLarsCV and LassoLarsIC." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the residues on left-out data for a full LARS path\n\nParameters\n-----------\nX_train : array-like of shape (n_samples, n_features)\n The data to fit the LARS on\n\ny_train : array-like of shape (n_samples,)\n The target variable to fit LARS on\n\nX_test : array-like of shape (n_samples, n_features)\n The data to compute the residues on\n\ny_test : array-like of shape (n_samples,)\n The target variable to compute the residues on\n\nGram : None, 'auto' or array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features\n\ncopy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied;\n if False, they may be overwritten.\n\nmethod : {'lar' , 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\nverbose : bool or int, default=False\n Sets the amount of verbosity\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n See reservations for using this option in combination with method\n 'lasso' for expected small values of alpha in the doc of LassoLarsCV\n and LassoLarsIC.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\nReturns\n--------\nalphas : array-like of shape (n_alphas,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n is smaller.\n\nactive : list\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas)\n Coefficients along the path\n\nresidues : array-like of shape (n_alphas, n_samples)\n Residues of the prediction on the test data" - } - ] - }, - { - "name": "sklearn.linear_model._logistic", - "imports": [ - "import numbers", - "import warnings", - "import numpy as np", - "from scipy import optimize", - "from scipy import sparse", - "from scipy.special import expit", - "from scipy.special import logsumexp", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _base import LinearClassifierMixin", - "from _base import SparseCoefMixin", - "from _base import BaseEstimator", - "from _sag import sag_solver", - "from preprocessing import LabelEncoder", - "from preprocessing import LabelBinarizer", - "from svm._base import _fit_liblinear", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils import compute_class_weight", - "from utils import check_random_state", - "from utils.extmath import log_logistic", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import softmax", - "from utils.extmath import squared_norm", - "from utils.extmath import row_norms", - "from utils.optimize import _newton_cg", - "from utils.optimize import _check_optimize_result", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from utils.fixes import _joblib_parallel_args", - "from utils.fixes import delayed", - "from model_selection import check_cv", - "from metrics import get_scorer" - ], - "classes": [ - { - "name": "LogisticRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet', 'none']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver. If 'none' (not supported by the liblinear solver), no regularization is applied. .. versionadded:: 0.19 l1 penalty with SAGA solver (allowing 'multinomial' + L1)" - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17 *class_weight='balanced'*" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "solver", - "type": "Literal['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and 'saga' are faster for large ones. - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs' handle multinomial loss; 'liblinear' is limited to one-versus-rest schemes. - 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty - 'liblinear' and 'saga' also handle L1 penalty - 'saga' also supports 'elasticnet' penalty - 'liblinear' does not support setting ``penalty='none'`` Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver. .. versionchanged:: 0.22 The default solver changed from 'liblinear' to 'lbfgs' in 0.22." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations taken for the solvers to converge." - }, - { - "name": "multi_class", - "type": "Literal['auto', 'ovr', 'multinomial']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. .. versionchanged:: 0.22 Default changed from 'ovr' to 'auto' in 0.22." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the liblinear and lbfgs solvers set verbose to any positive number for verbosity." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver. See :term:`the Glossary `. .. versionadded:: 0.17 *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPU cores used when parallelizing over classes if multi_class='ovr'\". This parameter is ignored when the ``solver`` is set to 'liblinear' regardless of whether 'multi_class' is specified or not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.17 *sample_weight* support to LogisticRegression." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to LogisticRegression.\n\nReturns\n-------\nself\n Fitted estimator.\n\nNotes\n-----\nThe SAGA solver supports both float64 and float32 bit arrays." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vector to be scored, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nFor a multi_class problem, if multi_class is set to be \"multinomial\"\nthe softmax function is used to find the predicted probability of\neach class.\nElse use a one-vs-rest approach, i.e calculate the probability\nof each class assuming it to be positive using the logistic function.\nand normalize these values across all the classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in ``self.classes_``." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vector to be scored, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict logarithm of probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in ``self.classes_``." - } - ], - "docstring": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npenalty : {'l1', 'l2', 'elasticnet', 'none'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver. If 'none' (not supported by the\n liblinear solver), no regularization is applied.\n\n .. versionadded:: 0.19\n l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Inverse of regularization strength; must be a positive float.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\nintercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n *class_weight='balanced'*\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty\n - 'liblinear' and 'saga' also handle L1 penalty\n - 'saga' also supports 'elasticnet' penalty\n - 'liblinear' does not support setting ``penalty='none'``\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can\n preprocess the data with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n .. versionchanged:: 0.22\n The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n\nmax_iter : int, default=100\n Maximum number of iterations taken for the solvers to converge.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n Useless for liblinear solver. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\nn_jobs : int, default=None\n Number of CPU cores used when parallelizing over classes if\n multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n set to 'liblinear' regardless of whether 'multi_class' is specified or\n not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors.\n See :term:`Glossary ` for more details.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `coef_` corresponds\n to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape (1,) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `intercept_`\n corresponds to outcome 1 (True) and `-intercept_` corresponds to\n outcome 0 (False).\n\nn_iter_ : ndarray of shape (n_classes,) or (1, )\n Actual number of iterations for all classes. If binary or multinomial,\n it returns only 1 element. For liblinear solver, only the maximum\n number of iteration across all classes is given.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression (when given\n the parameter ``loss=\"log\"``).\nLogisticRegressionCV : Logistic regression with built-in cross validation.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon,\nto have slightly different results for the same input data. If\nthat happens, try with a smaller tol parameter.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear `\nin the narrative documentation.\n\nReferences\n----------\n\nL-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\nLIBLINEAR -- A Library for Large Linear Classification\n https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\nSAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n Minimizing Finite Sums with the Stochastic Average Gradient\n https://hal.inria.fr/hal-00860051/document\n\nSAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n SAGA: A Fast Incremental Gradient Method With Support\n for Non-Strongly Convex Composite Objectives\n https://arxiv.org/abs/1407.0202\n\nHsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n methods for logistic regression and maximum entropy models.\n Machine Learning 85(1-2):41-75.\n https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :])\narray([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n [9.7...e-01, 2.8...e-02, ...e-08]])\n>>> clf.score(X, y)\n0.97..." - }, - { - "name": "LogisticRegressionCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "Cs", - "type": "Union[List, int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Each of the values in Cs describes the inverse of regularization strength. If Cs is as an int, then a grid of Cs values are chosen in a logarithmic scale between 1e-4 and 1e4. Like in support vector machines, smaller values specify stronger regularization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The default cross-validation generator used is Stratified K-Folds. If an integer is provided, then it is the number of folds used. See the module :mod:`sklearn.model_selection` module for the list of possible cross-validation objects. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. The default scoring option used is 'accuracy'." - }, - { - "name": "solver", - "type": "Literal['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and 'saga' are faster for large ones. - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs' handle multinomial loss; 'liblinear' is limited to one-versus-rest schemes. - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas 'liblinear' and 'saga' handle L1 penalty. - 'liblinear' might be slower in LogisticRegressionCV because it does not handle warm-starting. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the optimization algorithm." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17 class_weight == 'balanced'" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPU cores used during the cross-validation loop. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any positive number for verbosity." - }, - { - "name": "refit", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If set to True, the scores are averaged across all folds, and the coefs and the C that corresponds to the best score is taken, and a final refit is done using these parameters. Otherwise the coefs, intercepts and C that correspond to the best scores across folds are averaged." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'multinomial']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. .. versionchanged:: 0.22 Default changed from 'ovr' to 'auto' in 0.22." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data. Note that this only applies to the solver and not the cross-validation generator. See :term:`Glossary ` for details." - }, - { - "name": "l1_ratios", - "type": "List[float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to using ``penalty='l2'``, while 1 is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nself : object" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the score using the `scoring` option on the given\ntest data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score of self.predict(X) wrt. y." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary`).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nCs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of regularization\n strength. If Cs is as an int, then a grid of Cs values are chosen\n in a logarithmic scale between 1e-4 and 1e4.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\ncv : int or cross-validation generator, default=None\n The default cross-validation generator used is Stratified K-Folds.\n If an integer is provided, then it is the number of folds used.\n See the module :mod:`sklearn.model_selection` module for the\n list of possible cross-validation objects.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\nscoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is 'accuracy'.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas\n 'liblinear' and 'saga' handle L1 penalty.\n - 'liblinear' might be slower in LogisticRegressionCV because it does\n not handle warm-starting.\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can preprocess the data\n with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nmax_iter : int, default=100\n Maximum number of iterations of the optimization algorithm.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n class_weight == 'balanced'\n\nn_jobs : int, default=None\n Number of CPU cores used during the cross-validation loop.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n positive number for verbosity.\n\nrefit : bool, default=True\n If set to True, the scores are averaged across all folds, and the\n coefs and the C that corresponds to the best score is taken, and a\n final refit is done using these parameters.\n Otherwise the coefs, intercepts and C that correspond to the\n best scores across folds are averaged.\n\nintercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n Note that this only applies to the solver and not the cross-validation\n generator. See :term:`Glossary ` for details.\n\nl1_ratios : list of float, default=None\n The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n using ``penalty='l2'``, while 1 is equivalent to using\n ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n of L1 and L2.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem\n is binary.\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape(1,) when the problem is binary.\n\nCs_ : ndarray of shape (n_cs)\n Array of C i.e. inverse of regularization parameter values used\n for cross-validation.\n\nl1_ratios_ : ndarray of shape (n_l1_ratios)\n Array of l1_ratios used for cross-validation. If no l1_ratio is used\n (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\ncoefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or (n_folds, n_cs, n_features + 1)\n dict with classes as the keys, and the path of coefficients obtained\n during cross-validating across each fold and then across each Cs\n after doing an OvR for the corresponding class as values.\n If the 'multi_class' option is set to 'multinomial', then\n the coefs_paths are the coefficients corresponding to each class.\n Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\nscores_ : dict\n dict with classes as the keys, and the values as the\n grid of scores obtained during cross-validating each fold, after doing\n an OvR for the corresponding class. If the 'multi_class' option\n given is 'multinomial' then the same scores are repeated across\n all classes, since this is the multinomial class. Each dict value\n has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n ``penalty='elasticnet'``.\n\nC_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of C that maps to the best scores across every class. If refit is\n set to False, then for each class, the best C is the average of the\n C's that correspond to the best scores for each fold.\n `C_` is of shape(n_classes,) when the problem is binary.\n\nl1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of l1_ratio that maps to the best scores across every class. If\n refit is set to False, then for each class, the best l1_ratio is the\n average of the l1_ratio's that correspond to the best scores for each\n fold. `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\nn_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n Actual number of iterations for all classes, folds and Cs.\n In the binary or multinomial cases, the first dimension is equal to 1.\n If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegressionCV\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :]).shape\n(2, 3)\n>>> clf.score(X, y)\n0.98...\n\nSee Also\n--------\nLogisticRegression" - } - ], - "functions": [ - { - "name": "_intercept_dot", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes y * np.dot(X, w).\n\nIt takes into consideration if the intercept should be fit or not.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nReturns\n-------\nw : ndarray of shape (n_features,)\n Coefficient vector without the intercept weight (w[-1]) if the\n intercept should be fit. Unchanged otherwise.\n\nc : float\n The intercept.\n\nyz : float\n y * np.dot(X, w)." - }, - { - "name": "_logistic_loss_and_grad", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the logistic loss and gradient.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nout : float\n Logistic loss.\n\ngrad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient." - }, - { - "name": "_logistic_loss", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the logistic loss.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nout : float\n Logistic loss." - }, - { - "name": "_logistic_grad_hess", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the gradient and the Hessian, in the case of a logistic loss.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\ngrad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient.\n\nHs : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient." - }, - { - "name": "_multinomial_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes multinomial loss and class probabilities.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\nloss : float\n Multinomial loss.\n\np : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities.\n\nw : ndarray of shape (n_classes, n_features)\n Reshaped param vector excluding intercept terms.\n\nReference\n---------\nBishop, C. M. (2006). Pattern recognition and machine learning.\nSpringer. (Chapter 4.3.4)" - }, - { - "name": "_multinomial_loss_grad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the multinomial loss, gradient and class probabilities.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\nloss : float\n Multinomial loss.\n\ngrad : ndarray of shape (n_classes * n_features,) or (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\np : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities\n\nReference\n---------\nBishop, C. M. (2006). Pattern recognition and machine learning.\nSpringer. (Chapter 4.3.4)" - }, - { - "name": "_multinomial_grad_hess", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "(n_classes * (n_features + 1),) Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Transformed labels according to the output of LabelBinarizer." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the gradient and the Hessian, in the case of a multinomial loss.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\ngrad : ndarray of shape (n_classes * n_features,) or (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\nhessp : callable\n Function that takes in a vector input of shape (n_classes * n_features)\n or (n_classes * (n_features + 1)) and returns matrix-vector product\n with hessian.\n\nReferences\n----------\nBarak A. Pearlmutter (1993). Fast Exact Multiplication by the Hessian.\n http://www.bcl.hamilton.ie/~barak/papers/nc-hessian.pdf" - }, - { - "name": "_check_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_multi_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_logistic_regression_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, target values." - }, - { - "name": "pos_class", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class with respect to which we perform a one-vs-all fit. If None, then it is assumed that the given problem is binary." - }, - { - "name": "Cs", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "List of values for the regularization parameter or integer specifying the number of regularization parameters that should be used. In this case, the parameters will be chosen in a logarithmic scale between 1e-4 and 1e4." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit an intercept for the model. In this case the shape of the returned array is (n_cs, n_features + 1)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the newton-cg and lbfgs solvers, the iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol`` where ``g_i`` is the i-th component of the gradient." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the liblinear and lbfgs solvers set verbose to any positive number for verbosity." - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Numerical solver to use." - }, - { - "name": "coef", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization value for coefficients of logistic regression. Useless for liblinear solver." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'multinomial', 'auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. .. versionchanged:: 0.22 Default changed from 'ovr' to 'auto' in 0.22." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, the input arrays X and y will not be checked." - }, - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples. Used only in SAG solver. If None, it will be computed, going through all the samples. The value should be precomputed to speed up cross validation." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Input data, target values.\n\npos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\nCs : int or array-like of shape (n_cs,), default=10\n List of values for the regularization parameter or integer specifying\n the number of regularization parameters that should be used. In this\n case, the parameters will be chosen in a logarithmic scale between\n 1e-4 and 1e4.\n\nfit_intercept : bool, default=True\n Whether to fit an intercept for the model. In this case the shape of\n the returned array is (n_cs, n_features + 1).\n\nmax_iter : int, default=100\n Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n Numerical solver to use.\n\ncoef : array-like of shape (n_features,), default=None\n Initialization value for coefficients of logistic regression.\n Useless for liblinear solver.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\nintercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept. For\n ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n n_features) or (n_classes, n_cs, n_features + 1).\n\nCs : ndarray\n Grid of Cs used for cross-validation.\n\nn_iter : array of shape (n_cs,)\n Actual number of iteration for each Cs.\n\nNotes\n-----\nYou might get slightly different results with the solver liblinear than\nwith the others since this uses LIBLINEAR which penalizes the intercept.\n\n.. versionchanged:: 0.19\n The \"copy\" parameter was removed." - }, - { - "name": "_log_reg_scoring_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target labels." - }, - { - "name": "train", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the train set." - }, - { - "name": "test", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the test set." - }, - { - "name": "pos_class", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class with respect to which we perform a one-vs-all fit. If None, then it is assumed that the given problem is binary." - }, - { - "name": "Cs", - "type": "Union[List, int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Each of the values in Cs describes the inverse of regularization strength. If Cs is as an int, then a grid of Cs values are chosen in a logarithmic scale between 1e-4 and 1e4. If not provided, then a fixed set of values for Cs are used." - }, - { - "name": "scoring", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. The default scoring option used is accuracy_score." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If False, then the bias term is set to zero. Else the last term of each coef_ gives us the intercept." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the liblinear and lbfgs solvers set verbose to any positive number for verbosity." - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Decides which solver to use." - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "multi_class", - "type": "Literal['auto', 'ovr', 'multinomial']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples. Used only in SAG solver. If None, it will be computed, going through all the samples. The value should be precomputed to speed up cross validation." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes scores across logistic_regression_path\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target labels.\n\ntrain : list of indices\n The indices of the train set.\n\ntest : list of indices\n The indices of the test set.\n\npos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\nCs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of\n regularization strength. If Cs is as an int, then a grid of Cs\n values are chosen in a logarithmic scale between 1e-4 and 1e4.\n If not provided, then a fixed set of values for Cs are used.\n\nscoring : callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is accuracy_score.\n\nfit_intercept : bool, default=False\n If False, then the bias term is set to zero. Else the last\n term of each coef_ gives us the intercept.\n\nmax_iter : int, default=100\n Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n Decides which solver to use.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\nintercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept.\n\nCs : ndarray\n Grid of Cs used for cross-validation.\n\nscores : ndarray of shape (n_cs,)\n Scores obtained for each Cs.\n\nn_iter : ndarray of shape(n_cs,)\n Actual number of iteration for each Cs." - } - ] - }, - { - "name": "sklearn.linear_model._omp", - "imports": [ - "import warnings", - "from math import sqrt", - "import numpy as np", - "from scipy import linalg", - "from scipy.linalg.lapack import get_lapack_funcs", - "from joblib import Parallel", - "from _base import LinearModel", - "from _base import _pre_fit", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import as_float_array", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from model_selection import check_cv" - ], - "classes": [ - { - "name": "OrthogonalMatchingPursuit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram and Xy matrix to speed up calculations. Improves performance when :term:`n_targets` or :term:`n_samples` is very large. Note that if you already have such matrices, you can pass them directly to the fit method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto' or bool, default='auto'\n Whether to use a precomputed Gram and Xy matrix to speed up\n calculations. Improves performance when :term:`n_targets` or\n :term:`n_samples` is very large. Note that if you already have such\n matrices, you can pass them directly to the fit method.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : int or array-like\n Number of active features across every target.\n\nn_nonzero_coefs_ : int\n The number of non-zero coefficients in the solution. If\n `n_nonzero_coefs` is None and `tol` is None this value is either set\n to 10% of `n_features` or 1, whichever is greater.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuit\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuit().fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.predict(X[:1,])\narray([-78.3854...])\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\nSee Also\n--------\northogonal_mp\northogonal_mp_gram\nlars_path\nLars\nLassoLars\nsklearn.decomposition.sparse_encode\nOrthogonalMatchingPursuitCV" - }, - { - "name": "OrthogonalMatchingPursuitCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum numbers of iterations to perform, therefore maximum features to include. 10% of ``n_features`` but at least 5 if available." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncopy : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=None\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 10% of ``n_features`` but at least 5 if available.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nAttributes\n----------\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the problem formulation).\n\nn_nonzero_coefs_ : int\n Estimated number of non-zero coefficients giving the best mean squared\n error over the cross-validation folds.\n\nn_iter_ : int or array-like\n Number of active features across every target for the model refit with\n the best hyperparameters got by cross-validating across all folds.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=100, n_informative=10,\n... noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.n_nonzero_coefs_\n10\n>>> reg.predict(X[:1,])\narray([-78.3854...])\n\nSee Also\n--------\northogonal_mp\northogonal_mp_gram\nlars_path\nLars\nLassoLars\nOrthogonalMatchingPursuit\nLarsCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode" - } - ], - "functions": [ - { - "name": "_cholesky_omp", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input dictionary. Columns are assumed to have unit norm." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted number of non-zero elements." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted squared error, if not None overrides n_nonzero_coefs." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthogonal Matching Pursuit step using the Cholesky decomposition.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Input dictionary. Columns are assumed to have unit norm.\n\ny : ndarray of shape (n_samples,)\n Input targets.\n\nn_nonzero_coefs : int\n Targeted number of non-zero elements.\n\ntol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\ncopy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nReturns\n-------\ngamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\nidx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\ncoef : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\nn_active : int\n Number of active features at convergence." - }, - { - "name": "_gram_omp", - "decorators": [], - "parameters": [ - { - "name": "Gram", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gram matrix of the input data matrix." - }, - { - "name": "Xy", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted number of non-zero elements." - }, - { - "name": "tol_0", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared norm of y, required if tol is not None." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted squared error, if not None overrides n_nonzero_coefs." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "copy_Xy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthogonal Matching Pursuit step on a precomputed Gram matrix.\n\nThis function uses the Cholesky decomposition method.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data matrix.\n\nXy : ndarray of shape (n_features,)\n Input targets.\n\nn_nonzero_coefs : int\n Targeted number of non-zero elements.\n\ntol_0 : float, default=None\n Squared norm of y, required if tol is not None.\n\ntol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\ncopy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\ncopy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nReturns\n-------\ngamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\nidx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\ncoefs : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\nn_active : int\n Number of active features at convergence." - }, - { - "name": "orthogonal_mp", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. Columns are assumed to have unit norm." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." - }, - { - "name": "precompute", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform precomputations. Improves performance when n_targets or n_samples is very large." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems.\nAn instance of the problem has the form:\n\nWhen parametrized by the number of non-zero coefficients using\n`n_nonzero_coefs`:\nargmin ||y - X\\gamma||^2 subject to ||\\gamma||_0 <= n_{nonzero coefs}\n\nWhen parametrized by error using the parameter `tol`:\nargmin ||\\gamma||_0 subject to ||y - X\\gamma||^2 <= tol\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Input data. Columns are assumed to have unit norm.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Input targets.\n\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nprecompute : 'auto' or bool, default=False\n Whether to perform precomputations. Improves performance when n_targets\n or n_samples is very large.\n\ncopy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\nn_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp_gram\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf" - }, - { - "name": "orthogonal_mp_gram", - "decorators": [], - "parameters": [ - { - "name": "Gram", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gram matrix of the input data: X.T * X." - }, - { - "name": "Xy", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets multiplied by X: X.T * y." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." - }, - { - "name": "norms_squared", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared L2 norms of the lines of y. Required if tol is not None." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "copy_Xy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data: X.T * X.\n\nXy : ndarray of shape (n_features,) or (n_features, n_targets)\n Input targets multiplied by X: X.T * y.\n\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nnorms_squared : array-like of shape (n_targets,), default=None\n Squared L2 norms of the lines of y. Required if tol is not None.\n\ncopy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\ncopy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\nn_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf" - }, - { - "name": "_omp_path_residues", - "decorators": [], - "parameters": [ - { - "name": "X_train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit the LARS on." - }, - { - "name": "y_train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to fit LARS on." - }, - { - "name": "X_test", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to compute the residues on." - }, - { - "name": "y_test", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to compute the residues on." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether X_train, X_test, y_train and y_test should be copied. If False, they may be overwritten." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum numbers of iterations to perform, therefore maximum features to include. 100 by default." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the residues on left-out data for a full LARS path.\n\nParameters\n----------\nX_train : ndarray of shape (n_samples, n_features)\n The data to fit the LARS on.\n\ny_train : ndarray of shape (n_samples)\n The target variable to fit LARS on.\n\nX_test : ndarray of shape (n_samples, n_features)\n The data to compute the residues on.\n\ny_test : ndarray of shape (n_samples)\n The target variable to compute the residues on.\n\ncopy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied. If\n False, they may be overwritten.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=100\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 100 by default.\n\nReturns\n-------\nresidues : ndarray of shape (n_samples, max_features)\n Residues of the prediction on the test data." - } - ] - }, - { - "name": "sklearn.linear_model._passive_aggressive", - "imports": [ - "from utils.validation import _deprecate_positional_args", - "from _stochastic_gradient import BaseSGDClassifier", - "from _stochastic_gradient import BaseSGDRegressor", - "from _stochastic_gradient import DEFAULT_EPSILON" - ], - "classes": [ - { - "name": "PassiveAggressiveClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Maximum step size (regularization). Defaults to 1.0." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). .. versionadded:: 0.19" - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation. score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level" - }, - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "\"hinge\"", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used: hinge: equivalent to PA-I in the reference paper. squared_hinge: equivalent to PA-II in the reference paper." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to shuffle the training data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled." - }, - { - "name": "class_weight", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` .. versionadded:: 0.17 parameter *class_weight* to automatically weight samples." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So average=10 will begin averaging after seeing 10 samples. .. versionadded:: 0.19 parameter *average* to use weights averaging in SGD" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the target values" - }, - { - "name": "classes", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained by via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of the training data\n\ny : numpy array of shape [n_samples]\n Subset of the target values\n\nclasses : array, shape = [n_classes]\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "coef_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : numpy array of shape [n_samples]\n Target values\n\ncoef_init : array, shape = [n_classes,n_features]\n The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [n_classes]\n The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Passive Aggressive Classifier\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nC : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : integer, default=0\n The verbosity level\n\nloss : string, default=\"hinge\"\n The loss function to be used:\n hinge: equivalent to PA-I in the reference paper.\n squared_hinge: equivalent to PA-II in the reference paper.\n\nn_jobs : int or None, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\nclass_weight : dict, {class_label: weight} or \"balanced\" or None, default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n .. versionadded:: 0.17\n parameter *class_weight* to automatically weight samples.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes, n_features]\n Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nclasses_ : array of shape (n_classes,)\n The unique classes labels.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nloss_function_ : callable\n Loss function used by the algorithm.\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveClassifier\n>>> from sklearn.datasets import make_classification\n\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n... tol=1e-3)\n>>> clf.fit(X, y)\nPassiveAggressiveClassifier(random_state=0)\n>>> print(clf.coef_)\n[[0.26642044 0.45070924 0.67251877 0.64185414]]\n>>> print(clf.intercept_)\n[1.84127814]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]\n\nSee Also\n--------\nSGDClassifier\nPerceptron\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)" - }, - { - "name": "PassiveAggressiveRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Maximum step size (regularization). Defaults to 1.0." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). .. versionadded:: 0.19" - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation. score is not improving. If set to True, it will automatically set aside a fraction of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level" - }, - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "\"epsilon_insensitive\"", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used: epsilon_insensitive: equivalent to PA-I in the reference paper. squared_epsilon_insensitive: equivalent to PA-II in the reference paper." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "DEFAULT_EPSILON", - "limitation": null, - "ignored": false, - "docstring": "If the difference between the current prediction and the correct label is below this threshold, the model is not updated." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to shuffle the training data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So average=10 will begin averaging after seeing 10 samples. .. versionadded:: 0.19 parameter *average* to use weights averaging in SGD" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of target values" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of training data\n\ny : numpy array of shape [n_samples]\n Subset of target values\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "coef_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : numpy array of shape [n_samples]\n Target values\n\ncoef_init : array, shape = [n_features]\n The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [1]\n The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Passive Aggressive Regressor\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nC : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered. Defaults to True.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : integer, default=0\n The verbosity level\n\nloss : string, default=\"epsilon_insensitive\"\n The loss function to be used:\n epsilon_insensitive: equivalent to PA-I in the reference paper.\n squared_epsilon_insensitive: equivalent to PA-II in the reference\n paper.\n\nepsilon : float, default=DEFAULT_EPSILON\n If the difference between the current prediction and the correct label\n is below this threshold, the model is not updated.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes, n_features]\n Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveRegressor\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n... tol=1e-3)\n>>> regr.fit(X, y)\nPassiveAggressiveRegressor(max_iter=100, random_state=0)\n>>> print(regr.coef_)\n[20.48736655 34.18818427 67.59122734 87.94731329]\n>>> print(regr.intercept_)\n[-0.02306214]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-0.02306214]\n\nSee Also\n--------\nSGDRegressor\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)" - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._perceptron", - "imports": [ - "from utils.validation import _deprecate_positional_args", - "from _stochastic_gradient import BaseSGDClassifier" - ], - "classes": [ - { - "name": "Perceptron", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "penalty", - "type": "Literal['l2', 'l1', 'elasticnet']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The penalty (aka regularization term) to be used." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term if regularization is used." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`. `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1. Only used if `penalty='elasticnet'`. .. versionadded:: 0.24" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level" - }, - { - "name": "eta0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant by which the updates are multiplied." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to shuffle the training data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation. score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20" - }, - { - "name": "class_weight", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Perceptron\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\npenalty : {'l2','l1','elasticnet'}, default=None\n The penalty (aka regularization term) to be used.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term if regularization is\n used.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n Only used if `penalty='elasticnet'`.\n\n .. versionadded:: 0.24\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level\n\neta0 : double, default=1\n Constant by which the updates are multiplied.\n\nn_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution. See\n :term:`the Glossary `.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nloss_function_ : concrete\u00a0LossFunction\n The function that determines the loss, or difference between the\n output of the algorithm and the target values.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nNotes\n-----\n\n``Perceptron`` is a classification algorithm which shares the same\nunderlying implementation with ``SGDClassifier``. In fact,\n``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\neta0=1, learning_rate=\"constant\", penalty=None)`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import Perceptron\n>>> X, y = load_digits(return_X_y=True)\n>>> clf = Perceptron(tol=1e-3, random_state=0)\n>>> clf.fit(X, y)\nPerceptron()\n>>> clf.score(X, y)\n0.939...\n\nSee Also\n--------\nSGDClassifier\n\nReferences\n----------\n\nhttps://en.wikipedia.org/wiki/Perceptron and references therein." - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._ransac", - "imports": [ - "import numpy as np", - "import warnings", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from base import RegressorMixin", - "from base import clone", - "from base import MultiOutputMixin", - "from utils import check_random_state", - "from utils import check_consistent_length", - "from utils.random import sample_without_replacement", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from _base import LinearRegression", - "from utils.validation import has_fit_parameter", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "RANSACRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Base estimator object which implements the following methods: * `fit(X, y)`: Fit model to given training data and target values. * `score(X, y)`: Returns the mean accuracy on the given test data, which is used for the stop criterion defined by `stop_score`. Additionally, the score is used to decide which of two equally large consensus sets is chosen as the better one. * `predict(X)`: Returns predicted values using the linear model, which is used to compute residual error using loss function. If `base_estimator` is None, then :class:`~sklearn.linear_model.LinearRegression` is used for target values of dtype float. Note that the current implementation only supports regression estimators." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples chosen randomly from original data. Treated as an absolute number of samples for `min_samples >= 1`, treated as a relative number `ceil(min_samples * X.shape[0]`) for `min_samples < 1`. This is typically chosen as the minimal number of samples necessary to estimate the given `base_estimator`. By default a ``sklearn.linear_model.LinearRegression()`` estimator is assumed and `min_samples` is chosen as ``X.shape[1] + 1``." - }, - { - "name": "residual_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum residual for a data sample to be classified as an inlier. By default the threshold is chosen as the MAD (median absolute deviation) of the target values `y`." - }, - { - "name": "is_data_valid", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This function is called with the randomly selected data before the model is fitted to it: `is_data_valid(X, y)`. If its return value is False the current randomly chosen sub-sample is skipped." - }, - { - "name": "is_model_valid", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This function is called with the estimated model and the randomly selected data: `is_model_valid(model, X, y)`. If its return value is False the current randomly chosen sub-sample is skipped. Rejecting samples with this function is computationally costlier than with `is_data_valid`. `is_model_valid` should therefore only be used if the estimated model is needed for making the rejection decision." - }, - { - "name": "max_trials", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for random sample selection." - }, - { - "name": "max_skips", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations that can be skipped due to finding zero inliers or invalid data defined by ``is_data_valid`` or invalid models defined by ``is_model_valid``. .. versionadded:: 0.19" - }, - { - "name": "stop_n_inliers", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Stop iteration if at least this number of inliers are found." - }, - { - "name": "stop_score", - "type": "float", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Stop iteration if score is greater equal than this threshold." - }, - { - "name": "stop_probability", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "RANSAC iteration stops if at least one outlier-free set of the training data is sampled in RANSAC. This requires to generate at least N samples (iterations):: N >= log(1 - probability) / log(1 - e**m) where the probability (confidence) is typically set to high value such as 0.99 (the default) and e is the current fraction of inliers w.r.t. the total number of samples." - }, - { - "name": "loss", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'absolute_loss'", - "limitation": null, - "ignored": false, - "docstring": "String inputs, \"absolute_loss\" and \"squared_loss\" are supported which find the absolute loss and squared loss per sample respectively. If ``loss`` is a callable, then it should be a function that takes two arrays as inputs, the true and predicted value and returns a 1-D array with the i-th value of the array corresponding to the loss on ``X[i]``. If the loss on a sample is greater than the ``residual_threshold``, then this sample is classified as an outlier. .. versionadded:: 0.18" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to initialize the centers. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample raises error if sample_weight is passed and base_estimator fit method does not support it. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit estimator using RANSAC algorithm.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_features]\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n raises error if sample_weight is passed and base_estimator\n fit method does not support it.\n\n .. versionadded:: 0.18\n\nRaises\n------\nValueError\n If no valid consensus set could be found. This occurs if\n `is_data_valid` and `is_model_valid` return False for all\n `max_trials` randomly chosen sub-samples." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the estimated model.\n\nThis is a wrapper for `estimator_.predict(X)`.\n\nParameters\n----------\nX : numpy array of shape [n_samples, n_features]\n\nReturns\n-------\ny : array, shape = [n_samples] or [n_samples, n_targets]\n Returns predicted values." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the score of the prediction.\n\nThis is a wrapper for `estimator_.score(X, y)`.\n\nParameters\n----------\nX : numpy array or sparse matrix of shape [n_samples, n_features]\n Training data.\n\ny : array, shape = [n_samples] or [n_samples, n_targets]\n Target values.\n\nReturns\n-------\nz : float\n Score of the prediction." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : object, default=None\n Base estimator object which implements the following methods:\n\n * `fit(X, y)`: Fit model to given training data and target values.\n * `score(X, y)`: Returns the mean accuracy on the given test data,\n which is used for the stop criterion defined by `stop_score`.\n Additionally, the score is used to decide which of two equally\n large consensus sets is chosen as the better one.\n * `predict(X)`: Returns predicted values using the linear model,\n which is used to compute residual error using loss function.\n\n If `base_estimator` is None, then\n :class:`~sklearn.linear_model.LinearRegression` is used for\n target values of dtype float.\n\n Note that the current implementation only supports regression\n estimators.\n\nmin_samples : int (>= 1) or float ([0, 1]), default=None\n Minimum number of samples chosen randomly from original data. Treated\n as an absolute number of samples for `min_samples >= 1`, treated as a\n relative number `ceil(min_samples * X.shape[0]`) for\n `min_samples < 1`. This is typically chosen as the minimal number of\n samples necessary to estimate the given `base_estimator`. By default a\n ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n `min_samples` is chosen as ``X.shape[1] + 1``.\n\nresidual_threshold : float, default=None\n Maximum residual for a data sample to be classified as an inlier.\n By default the threshold is chosen as the MAD (median absolute\n deviation) of the target values `y`.\n\nis_data_valid : callable, default=None\n This function is called with the randomly selected data before the\n model is fitted to it: `is_data_valid(X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n\nis_model_valid : callable, default=None\n This function is called with the estimated model and the randomly\n selected data: `is_model_valid(model, X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n Rejecting samples with this function is computationally costlier than\n with `is_data_valid`. `is_model_valid` should therefore only be used if\n the estimated model is needed for making the rejection decision.\n\nmax_trials : int, default=100\n Maximum number of iterations for random sample selection.\n\nmax_skips : int, default=np.inf\n Maximum number of iterations that can be skipped due to finding zero\n inliers or invalid data defined by ``is_data_valid`` or invalid models\n defined by ``is_model_valid``.\n\n .. versionadded:: 0.19\n\nstop_n_inliers : int, default=np.inf\n Stop iteration if at least this number of inliers are found.\n\nstop_score : float, default=np.inf\n Stop iteration if score is greater equal than this threshold.\n\nstop_probability : float in range [0, 1], default=0.99\n RANSAC iteration stops if at least one outlier-free set of the training\n data is sampled in RANSAC. This requires to generate at least N\n samples (iterations)::\n\n N >= log(1 - probability) / log(1 - e**m)\n\n where the probability (confidence) is typically set to high value such\n as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n the total number of samples.\n\nloss : string, callable, default='absolute_loss'\n String inputs, \"absolute_loss\" and \"squared_loss\" are supported which\n find the absolute loss and squared loss per sample\n respectively.\n\n If ``loss`` is a callable, then it should be a function that takes\n two arrays as inputs, the true and predicted value and returns a 1-D\n array with the i-th value of the array corresponding to the loss\n on ``X[i]``.\n\n If the loss on a sample is greater than the ``residual_threshold``,\n then this sample is classified as an outlier.\n\n .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the centers.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimator_ : object\n Best fitted model (copy of the `base_estimator` object).\n\nn_trials_ : int\n Number of random selection trials until one of the stop criteria is\n met. It is always ``<= max_trials``.\n\ninlier_mask_ : bool array of shape [n_samples]\n Boolean mask of inliers classified as ``True``.\n\nn_skips_no_inliers_ : int\n Number of iterations skipped due to finding zero inliers.\n\n .. versionadded:: 0.19\n\nn_skips_invalid_data_ : int\n Number of iterations skipped due to invalid data defined by\n ``is_data_valid``.\n\n .. versionadded:: 0.19\n\nn_skips_invalid_model_ : int\n Number of iterations skipped due to an invalid model defined by\n ``is_model_valid``.\n\n .. versionadded:: 0.19\n\nExamples\n--------\n>>> from sklearn.linear_model import RANSACRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n... n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = RANSACRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9885...\n>>> reg.predict(X[:1,])\narray([-31.9417...])\n\nReferences\n----------\n.. [1] https://en.wikipedia.org/wiki/RANSAC\n.. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf\n.. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf" - } - ], - "functions": [ - { - "name": "_dynamic_max_trials", - "decorators": [], - "parameters": [ - { - "name": "n_inliers", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of inliers in the data." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Total number of samples in the data." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples chosen randomly from original data." - }, - { - "name": "probability", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Probability (confidence) that one outlier-free sample is generated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine number trials such that at least one outlier-free subset is\nsampled for the given inlier/outlier ratio.\n\nParameters\n----------\nn_inliers : int\n Number of inliers in the data.\n\nn_samples : int\n Total number of samples in the data.\n\nmin_samples : int\n Minimum number of samples chosen randomly from original data.\n\nprobability : float\n Probability (confidence) that one outlier-free sample is generated.\n\nReturns\n-------\ntrials : int\n Number of trials." - } - ] - }, - { - "name": "sklearn.linear_model._ridge", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy import sparse", - "from scipy.sparse import linalg as sp_linalg", - "from _base import LinearClassifierMixin", - "from _base import LinearModel", - "from _base import _rescale_data", - "from _sag import sag_solver", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from base import is_classifier", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import row_norms", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils import compute_sample_weight", - "from utils import column_or_1d", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from preprocessing import LabelBinarizer", - "from model_selection import GridSearchCV", - "from metrics import check_scoring", - "from exceptions import ConvergenceWarning", - "from utils.sparsefuncs import mean_variance_axis" - ], - "classes": [ - { - "name": "_BaseRidge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Ridge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit the intercept for this model. If set to false, no intercept will be used in calculations (i.e. ``X`` and ``y`` are expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for conjugate gradient solver. For 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' solver, the default value is 1000." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Precision of the solution." - }, - { - "name": "solver", - "type": "Literal['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than 'cholesky'. - 'cholesky' uses the standard scipy.linalg.solve function to obtain a closed-form solution. - 'sparse_cg' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than 'cholesky' for large-scale data (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses its improved, unbiased version named SAGA. Both methods also use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. All last five solvers support both dense and sparse data. However, only 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is True. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details. .. versionadded:: 0.17 `random_state` to support Stochastic Average Gradient." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge regression model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : {float, ndarray of shape (n_targets,)}, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\nfit_intercept : bool, default=True\n Whether to fit the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. ``X`` and ``y`` are expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.17\n `random_state` to support Stochastic Average Gradient.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\n .. versionadded:: 0.17\n\nSee Also\n--------\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n:class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n combines ridge regression with the kernel trick.\n\nExamples\n--------\n>>> from sklearn.linear_model import Ridge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = Ridge(alpha=1.0)\n>>> clf.fit(X, y)\nRidge()" - }, - { - "name": "RidgeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for conjugate gradient solver. The default value is determined by scipy.sparse.linalg." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Precision of the solution." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``." - }, - { - "name": "solver", - "type": "Literal['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than 'cholesky'. - 'cholesky' uses the standard scipy.linalg.solve function to obtain a closed-form solution. - 'sparse_cg' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than 'cholesky' for large-scale data (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses its unbiased and more flexible version named SAGA. Both methods use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight. .. versionadded:: 0.17 *sample_weight* support to Classifier." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge classifier model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to Classifier.\n\nReturns\n-------\nself : object\n Instance of the estimator." - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set to false, no\n intercept will be used in calculations (e.g. data is expected to be\n already centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n The default value is determined by scipy.sparse.linalg.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its unbiased and more flexible version named SAGA. Both methods\n use an iterative procedure, and are often faster than other solvers\n when both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifierCV : Ridge classifier with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifier\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifier().fit(X, y)\n>>> clf.score(X, y)\n0.9595..." - }, - { - "name": "_X_CenterStackOp", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matvec", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matmat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transpose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Behaves as centered and scaled X with an added intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]])" - }, - { - "name": "_XT_CenterStackOp", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matvec", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matmat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Behaves as transposed centered and scaled X with an intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]]).T" - }, - { - "name": "_IdentityRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Fake regressor which will directly output the prediction." - }, - { - "name": "_IdentityClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Fake classifier which will directly output the prediction.\n\nWe inherit from LinearClassifierMixin to get the proper shape for the\noutput `y`." - }, - { - "name": "_RidgeGCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_decomp_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_diag_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_gram", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The preprocessed design matrix." - }, - { - "name": "sqrt_sw", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "square roots of sample weights" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the Gram matrix XX^T with possible centering.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The preprocessed design matrix.\n\nsqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\nReturns\n-------\ngram : ndarray of shape (n_samples, n_samples)\n The Gram matrix.\nX_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\nNotes\n-----\nWhen X is dense the centering has been done in preprocessing\nso the mean is 0 and we just compute XX^T.\n\nWhen X is sparse it has not been centered in preprocessing, but it has\nbeen scaled by sqrt(sample weights).\n\nWhen self.fit_intercept is False no centering is done.\n\nThe centered X is never actually computed because centering would break\nthe sparsity of X." - }, - { - "name": "_compute_covariance", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The preprocessed design matrix." - }, - { - "name": "sqrt_sw", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "square roots of sample weights" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes covariance matrix X^TX with possible centering.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n The preprocessed design matrix.\n\nsqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The covariance matrix.\nX_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\nNotes\n-----\nSince X is sparse it has not been centered in preprocessing, but it has\nbeen scaled by sqrt(sample weights).\n\nWhen self.fit_intercept is False no centering is done.\n\nThe centered X is never actually computed because centering would break\nthe sparsity of X." - }, - { - "name": "_sparse_multidot_diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "A", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X_mean", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sqrt_sw", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "square roots of sample weights" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\nwithout explicitely centering X nor computing X.dot(A)\nwhen X is sparse.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n\nA : ndarray of shape (n_features, n_features)\n\nX_mean : ndarray of shape (n_features,)\n\nsqrt_sw : ndarray of shape (n_features,)\n square roots of sample weights\n\nReturns\n-------\ndiag : np.ndarray, shape (n_samples,)\n The computed diagonal." - }, - { - "name": "_eigen_decompose_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Eigendecomposition of X.X^T, used when n_samples <= n_features." - }, - { - "name": "_solve_eigen_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X.X^T (n_samples <= n_features)." - }, - { - "name": "_eigen_decompose_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Eigendecomposition of X^T.X, used when n_samples > n_features\nand X is sparse." - }, - { - "name": "_solve_eigen_covariance_no_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse), and not fitting an intercept." - }, - { - "name": "_solve_eigen_covariance_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse),\nand we are fitting an intercept." - }, - { - "name": "_solve_eigen_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse)." - }, - { - "name": "_svd_decompose_design_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_svd_design_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have an SVD decomposition of X\n(n_samples > n_features and X is dense)." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Will be cast to float64 if necessary." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to float64 if necessary." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge regression model with gcv.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data. Will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to float64 if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.\n\nNotes\n-----\n\nWe want to solve (K + alpha*Id)c = y,\nwhere K = X X^T is the kernel matrix.\n\nLet G = (K + alpha*Id).\n\nDual solution: c = G^-1y\nPrimal solution: w = X^T c\n\nCompute eigendecomposition K = Q V Q^T.\nThen G^-1 = Q (V + alpha*Id)^-1 Q^T,\nwhere (V + alpha*Id) is diagonal.\nIt is thus inexpensive to inverse for many alphas.\n\nLet loov be the vector of prediction values for each example\nwhen the model was fitted with all examples but this example.\n\nloov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\nLet looe be the vector of prediction errors for each example\nwhen the model was fitted with all examples but this example.\n\nlooe = y - loov = c / diag(G^-1)\n\nThe best score (negative mean squared error or user-provided scoring) is\nstored in the `best_score_` attribute, and the selected hyperparameter in\n`alpha_`.\n\nReferences\n----------\nhttp://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\nhttps://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf" - }, - { - "name": "_BaseRidgeCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. If using GCV, will be cast to float64 if necessary." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge regression model with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data. If using GCV, will be cast to float64\n if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nWhen sample_weight is provided, the selected hyperparameter may depend\non whether we use leave-one-out cross-validation (cv=None or cv='auto')\nor another form of cross-validation, because only leave-one-out\ncross-validation takes the sample weights into account when computing\nthe validation score." - } - ], - "docstring": null - }, - { - "name": "RidgeCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alphas", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of alpha values to try. Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If using Leave-One-Out cross-validation, alphas must be positive." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. If None, the negative mean squared error if cv is 'auto' or None (i.e. when using leave-one-out cross-validation), and r2 score otherwise." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the efficient Leave-One-Out cross-validation - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used, else, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here." - }, - { - "name": "gcv_mode", - "type": "Literal['auto', 'svd']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating which strategy to use when performing Leave-One-Out Cross-Validation. Options are:: 'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen' 'svd' : force use of singular value decomposition of X when X is dense, eigenvalue decomposition of X^T.X when X is sparse. 'eigen' : force computation via eigendecomposition of X.X^T The 'auto' mode is the default and is intended to pick the cheaper option of the two depending on the shape of the training data." - }, - { - "name": "store_cv_values", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating if the cross-validation values corresponding to each alpha should be stored in the ``cv_values_`` attribute (see below). This flag is only compatible with ``cv=None`` (i.e. using Leave-One-Out Cross-Validation)." - }, - { - "name": "alpha_per_target", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating whether to optimize the alpha value (picked from the `alphas` parameter list) for each target separately (for multi-output settings: multiple prediction targets). When set to `True`, after fitting, the `alpha_` attribute will contain a value for each target. When set to `False`, a single alpha is used for all targets. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n If using Leave-One-Out cross-validation, alphas must be positive.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nscoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n If None, the negative mean squared error if cv is 'auto' or None\n (i.e. when using leave-one-out cross-validation), and r2 score\n otherwise.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\ngcv_mode : {'auto', 'svd', eigen'}, default='auto'\n Flag indicating which strategy to use when performing\n Leave-One-Out Cross-Validation. Options are::\n\n 'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n 'svd' : force use of singular value decomposition of X when X is\n dense, eigenvalue decomposition of X^T.X when X is sparse.\n 'eigen' : force computation via eigendecomposition of X.X^T\n\n The 'auto' mode is the default and is intended to pick the cheaper\n option of the two depending on the shape of the training data.\n\nstore_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\nalpha_per_target : bool, default=False\n Flag indicating whether to optimize the alpha value (picked from the\n `alphas` parameter list) for each target separately (for multi-output\n settings: multiple prediction targets). When set to `True`, after\n fitting, the `alpha_` attribute will contain a value for each target.\n When set to `False`, a single alpha is used for all targets.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_alphas) or shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (only available if\n ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n called, this attribute will contain the mean squared errors\n (by default) or the values of the ``{loss,score}_func`` function\n (if provided in the constructor).\n\ncoef_ : ndarray of shape (n_features) or (n_targets, n_features)\n Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float or ndarray of shape (n_targets,)\n Estimated regularization parameter, or, if ``alpha_per_target=True``,\n the estimated regularization parameter for each target.\n\nbest_score_ : float or ndarray of shape (n_targets,)\n Score of base estimator with best alpha, or, if\n ``alpha_per_target=True``, a score for each target.\n\n .. versionadded:: 0.23\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> X, y = load_diabetes(return_X_y=True)\n>>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.5166...\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeClassifierCV : Ridge classifier with built-in cross validation." - }, - { - "name": "RidgeClassifierCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alphas", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of alpha values to try. Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the efficient Leave-One-Out cross-validation - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "store_cv_values", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating if the cross-validation values corresponding to each alpha should be stored in the ``cv_values_`` attribute (see below). This flag is only compatible with ``cv=None`` (i.e. using Leave-One-Out Cross-Validation)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. When using GCV, will be cast to float64 if necessary." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge classifier with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features. When using GCV,\n will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object" - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nscoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nstore_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (if ``store_cv_values=True`` and\n ``cv=None``). After ``fit()`` has been called, this attribute will\n contain the mean squared errors (by default) or the values of the\n ``{loss,score}_func`` function (if provided in the constructor). This\n attribute exists only when ``store_cv_values`` is True.\n\ncoef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float\n Estimated regularization parameter.\n\nbest_score_ : float\n Score of base estimator with best alpha.\n\n .. versionadded:: 0.23\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.9630...\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge." - } - ], - "functions": [ - { - "name": "_solve_sparse_cg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_lsqr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_cholesky", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_cholesky_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_valid_accept_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "ridge_regression", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "alpha", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number." - }, - { - "name": "sample_weight", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight. If sample_weight is not None and solver='auto', the solver will be set to 'cholesky'. .. versionadded:: 0.17" - }, - { - "name": "solver", - "type": "Literal['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than 'cholesky'. - 'cholesky' uses the standard scipy.linalg.solve function to obtain a closed-form solution via a Cholesky decomposition of dot(X.T, X) - 'sparse_cg' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than 'cholesky' for large-scale data (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses its improved, unbiased version named SAGA. Both methods also use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. All last five solvers support both dense and sparse data. However, only 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is True. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for conjugate gradient solver. For the 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' and saga solver, the default value is 1000." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Precision of the solution." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level. Setting verbose > 0 will display additional information depending on the solver used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the method also returns `n_iter`, the actual number of iteration performed by the solver. .. versionadded:: 0.17" - }, - { - "name": "return_intercept", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True and if X is sparse, the method also returns the intercept, and the solver is automatically changed to 'sag'. This is only a temporary fix for fitting the intercept with sparse data. For dense data, use sklearn.linear_model._preprocess_data before your regression. .. versionadded:: 0.17" - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, the input arrays X and y will not be checked. .. versionadded:: 0.21" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {ndarray, sparse matrix, LinearOperator} of shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nalpha : float or array-like of shape (n_targets,)\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight. If sample_weight is not None and\n solver='auto', the solver will be set to 'cholesky'.\n\n .. versionadded:: 0.17\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution via a Cholesky decomposition of\n dot(X.T, X)\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n 1000.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nverbose : int, default=0\n Verbosity level. Setting verbose > 0 will display additional\n information depending on the solver used.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\nreturn_n_iter : bool, default=False\n If True, the method also returns `n_iter`, the actual number of\n iteration performed by the solver.\n\n .. versionadded:: 0.17\n\nreturn_intercept : bool, default=False\n If True and if X is sparse, the method also returns the intercept,\n and the solver is automatically changed to 'sag'. This is only a\n temporary fix for fitting the intercept with sparse data. For dense\n data, use sklearn.linear_model._preprocess_data before your regression.\n\n .. versionadded:: 0.17\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\n .. versionadded:: 0.21\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\nn_iter : int, optional\n The actual number of iteration performed by the solver.\n Only returned if `return_n_iter` is True.\n\nintercept : float or ndarray of shape (n_targets,)\n The intercept of the model. Only returned if `return_intercept`\n is True and if X is a scipy sparse array.\n\nNotes\n-----\nThis function won't compute the intercept." - }, - { - "name": "_ridge_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_gcv_mode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_find_smallest_angle", - "decorators": [], - "parameters": [ - { - "name": "query", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Normalized query vector." - }, - { - "name": "vectors", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vectors to which we compare query, as columns. Must be normalized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the column of vectors that is most aligned with the query.\n\nBoth query and the columns of vectors must have their l2 norm equal to 1.\n\nParameters\n----------\nquery : ndarray of shape (n_samples,)\n Normalized query vector.\n\nvectors : ndarray of shape (n_samples, n_features)\n Vectors to which we compare query, as columns. Must be normalized." - } - ] - }, - { - "name": "sklearn.linear_model._sag", - "imports": [ - "import warnings", - "import numpy as np", - "from _base import make_dataset", - "from _sag_fast import sag32", - "from _sag_fast import sag64", - "from exceptions import ConvergenceWarning", - "from utils import check_array", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import row_norms" - ], - "classes": [], - "functions": [ - { - "name": "get_auto_step_size", - "decorators": [], - "parameters": [ - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples." - }, - { - "name": "alpha_scaled", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term, scaled by 1. / n_samples, the number of samples." - }, - { - "name": "loss", - "type": "Literal['log', 'squared', 'multinomial']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function used in SAG solver." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) will be added to the decision function." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of rows in X. Useful if is_saga=True." - }, - { - "name": "is_saga", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return step size for the SAGA algorithm or the SAG algorithm." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute automatic step size for SAG solver.\n\nThe step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is\nthe max sum of squares for over all samples.\n\nParameters\n----------\nmax_squared_sum : float\n Maximum squared sum of X over samples.\n\nalpha_scaled : float\n Constant that multiplies the regularization term, scaled by\n 1. / n_samples, the number of samples.\n\nloss : {'log', 'squared', 'multinomial'}\n The loss function used in SAG solver.\n\nfit_intercept : bool\n Specifies if a constant (a.k.a. bias or intercept) will be\n added to the decision function.\n\nn_samples : int, default=None\n Number of rows in X. Useful if is_saga=True.\n\nis_saga : bool, default=False\n Whether to return step size for the SAGA algorithm or the SAG\n algorithm.\n\nReturns\n-------\nstep_size : float\n Step size used in SAG solver.\n\nReferences\n----------\nSchmidt, M., Roux, N. L., & Bach, F. (2013).\nMinimizing finite sums with the stochastic average gradient\nhttps://hal.inria.fr/hal-00860051/document\n\nDefazio, A., Bach F. & Lacoste-Julien S. (2014).\nSAGA: A Fast Incremental Gradient Method With Support\nfor Non-Strongly Convex Composite Objectives\nhttps://arxiv.org/abs/1407.0202" - }, - { - "name": "sag_solver", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. With loss='multinomial', y must be label encoded (see preprocessing.LabelEncoder)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - }, - { - "name": "loss", - "type": "Literal['log', 'squared', 'multinomial']", - "hasDefault": true, - "default": "'log'", - "limitation": null, - "ignored": false, - "docstring": "Loss function that will be optimized: -'log' is the binary logistic loss, as used in LogisticRegression. -'squared' is the squared loss, as used in Ridge. -'multinomial' is the multinomial logistic loss, as used in LogisticRegression. .. versionadded:: 0.18 *loss='multinomial'*" - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization term in the objective function ``(0.5 * alpha * || W ||_F^2)``." - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization term in the objective function ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The max number of passes over the training data if the stopping criteria is not reached." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The stopping criteria for the weights. The iterations will stop when max(change in weights) / max(weights) < tol." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, the input arrays X and y will not be checked." - }, - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples. If None, it will be computed, going through all the samples. The value should be precomputed to speed up cross validation." - }, - { - "name": "warm_start_mem", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initialization parameters used for warm starting. Warm starting is currently used in LogisticRegression but not in Ridge. It contains: - 'coef': the weight vector, with the intercept in last line if the intercept is fitted. - 'gradient_memory': the scalar gradient for all seen samples. - 'sum_gradient': the sum of gradient over all seen samples, for each feature. - 'intercept_sum_gradient': the sum of gradient over all seen samples, for the intercept. - 'seen': array of boolean describing the seen samples. - 'num_seen': the number of seen samples." - }, - { - "name": "is_saga", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves better in the first epochs, and allow for l1 regularisation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "SAG solver for Ridge and LogisticRegression.\n\nSAG stands for Stochastic Average Gradient: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na constant learning rate.\n\nIMPORTANT NOTE: 'sag' solver converges faster on columns that are on the\nsame scale. You can normalize the data by using\nsklearn.preprocessing.StandardScaler on your data before passing it to the\nfit method.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values for the features. It will\nfit the data according to squared loss or log loss.\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using the squared euclidean norm L2.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values. With loss='multinomial', y must be label encoded\n (see preprocessing.LabelEncoder).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nloss : {'log', 'squared', 'multinomial'}, default='log'\n Loss function that will be optimized:\n -'log' is the binary logistic loss, as used in LogisticRegression.\n -'squared' is the squared loss, as used in Ridge.\n -'multinomial' is the multinomial logistic loss, as used in\n LogisticRegression.\n\n .. versionadded:: 0.18\n *loss='multinomial'*\n\nalpha : float, default=1.\n L2 regularization term in the objective function\n ``(0.5 * alpha * || W ||_F^2)``.\n\nbeta : float, default=0.\n L1 regularization term in the objective function\n ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True.\n\nmax_iter : int, default=1000\n The max number of passes over the training data if the stopping\n criteria is not reached.\n\ntol : double, default=0.001\n The stopping criteria for the weights. The iterations will stop when\n max(change in weights) / max(weights) < tol.\n\nverbose : int, default=0\n The verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when shuffling the data. Pass an int for reproducible output\n across multiple function calls.\n See :term:`Glossary `.\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. If None, it will be computed,\n going through all the samples. The value should be precomputed\n to speed up cross validation.\n\nwarm_start_mem : dict, default=None\n The initialization parameters used for warm starting. Warm starting is\n currently used in LogisticRegression but not in Ridge.\n It contains:\n - 'coef': the weight vector, with the intercept in last line\n if the intercept is fitted.\n - 'gradient_memory': the scalar gradient for all seen samples.\n - 'sum_gradient': the sum of gradient over all seen samples,\n for each feature.\n - 'intercept_sum_gradient': the sum of gradient over all seen\n samples, for the intercept.\n - 'seen': array of boolean describing the seen samples.\n - 'num_seen': the number of seen samples.\n\nis_saga : bool, default=False\n Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves\n better in the first epochs, and allow for l1 regularisation.\n\nReturns\n-------\ncoef_ : ndarray of shape (n_features,)\n Weight vector.\n\nn_iter_ : int\n The number of full pass on all samples.\n\nwarm_start_mem : dict\n Contains a 'coef' key with the fitted result, and possibly the\n fitted intercept at the end of the array. Contains also other keys\n used for warm starting.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import linear_model\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> X = rng.randn(n_samples, n_features)\n>>> y = rng.randn(n_samples)\n>>> clf = linear_model.Ridge(solver='sag')\n>>> clf.fit(X, y)\nRidge(solver='sag')\n\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> clf = linear_model.LogisticRegression(\n... solver='sag', multi_class='multinomial')\n>>> clf.fit(X, y)\nLogisticRegression(multi_class='multinomial', solver='sag')\n\nReferences\n----------\nSchmidt, M., Roux, N. L., & Bach, F. (2013).\nMinimizing finite sums with the stochastic average gradient\nhttps://hal.inria.fr/hal-00860051/document\n\nDefazio, A., Bach F. & Lacoste-Julien S. (2014).\nSAGA: A Fast Incremental Gradient Method With Support\nfor Non-Strongly Convex Composite Objectives\nhttps://arxiv.org/abs/1407.0202\n\nSee Also\n--------\nRidge, SGDRegressor, ElasticNet, Lasso, SVR,\nLogisticRegression, SGDClassifier, LinearSVC, Perceptron" - } - ] - }, - { - "name": "sklearn.linear_model._stochastic_gradient", - "imports": [ - "import numpy as np", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from joblib import Parallel", - "from base import clone", - "from base import is_classifier", - "from _base import LinearClassifierMixin", - "from _base import SparseCoefMixin", - "from _base import make_dataset", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from utils import check_array", - "from utils import check_random_state", - "from utils import check_X_y", - "from utils.extmath import safe_sparse_dot", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from exceptions import ConvergenceWarning", - "from model_selection import StratifiedShuffleSplit", - "from model_selection import ShuffleSplit", - "from _sgd_fast import _plain_sgd", - "from utils import compute_class_weight", - "from _sgd_fast import Hinge", - "from _sgd_fast import SquaredHinge", - "from _sgd_fast import Log", - "from _sgd_fast import ModifiedHuber", - "from _sgd_fast import SquaredLoss", - "from _sgd_fast import Huber", - "from _sgd_fast import EpsilonInsensitive", - "from _sgd_fast import SquaredEpsilonInsensitive", - "from utils.fixes import _joblib_parallel_args", - "from utils import deprecated" - ], - "classes": [ - { - "name": "_ValidationScoreCallback", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Callback for early stopping based on validation score" - }, - { - "name": "BaseSGD", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [ - { - "name": "**kwargs", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Set and validate the parameters of estimator.\n\nParameters\n----------\n**kwargs : dict\n Estimator parameters.\n\nReturns\n-------\nself : object\n Estimator instance." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model." - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate input params. " - }, - { - "name": "_get_loss_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get concrete ``LossFunction`` object for str ``loss``. " - }, - { - "name": "_get_learning_rate_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_penalty_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_allocate_parameter_mem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Allocate mem for parameters; initialize if provided." - }, - { - "name": "_make_validation_split", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split the dataset between training set and validation set.\n\nParameters\n----------\ny : ndarray of shape (n_samples, )\n Target values.\n\nReturns\n-------\nvalidation_mask : ndarray of shape (n_samples, )\n Equal to 1 on the validation set, 0 on the training set." - }, - { - "name": "_make_validation_score_cb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "standard_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "standard_intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "average_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "average_intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for SGD classification and regression." - }, - { - "name": "BaseSGDClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a binary classifier on X and y. " - }, - { - "name": "_fit_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a multi-class classifier by combining binary classifiers\n\nEach binary classifier predicts one class versus all others. This\nstrategy is called OvA (One versus All) or OvR (One versus Rest)." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the target values." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained by via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples. If not provided, uniform weights are assumed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of the training data.\n\ny : ndarray of shape (n_samples,)\n Subset of the target values.\n\nclasses : ndarray of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\nReturns\n-------\nself :\n Returns an instance of self." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples. If not provided, uniform weights are assumed. These weights will be multiplied with class_weight (passed through the constructor) if class_weight is specified." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\ncoef_init : ndarray of shape (n_classes, n_features), default=None\n The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (n_classes,), default=None\n The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed. These weights will\n be multiplied with class_weight (passed through the\n constructor) if class_weight is specified.\n\nReturns\n-------\nself :\n Returns an instance of self." - } - ], - "docstring": null - }, - { - "name": "SGDClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "'hinge'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used. Defaults to 'hinge', which gives a linear SVM. The possible options are 'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss', 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. The 'log' loss gives logistic regression, a probabilistic classifier. 'modified_huber' is another smooth loss that brings tolerance to outliers as well as probability estimates. 'squared_hinge' is like hinge but is quadratically penalized. 'perceptron' is the linear loss used by the perceptron algorithm. The other losses are designed for regression but can be useful in classification as well; see :class:`~sklearn.linear_model.SGDRegressor` for a description. More details about the losses formulas can be found in the :ref:`User Guide `." - }, - { - "name": "penalty", - "type": "Literal['l2', 'l1', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The penalty (aka regularization term) to be used. Defaults to 'l2' which is the standard regularizer for linear SVM models. 'l1' and 'elasticnet' might bring sparsity to the model (feature selection) not achievable with 'l2'." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term. The higher the value, the stronger the regularization. Also used to compute the learning rate when set to `learning_rate` is set to 'optimal'." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Only used if `penalty` is 'elasticnet'." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, training will stop when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive epochs. .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon in the epsilon-insensitive loss functions; only if `loss` is 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. For 'huber', determines the threshold at which it becomes less important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "learning_rate", - "type": "str", - "hasDefault": true, - "default": "'optimal'", - "limitation": null, - "ignored": false, - "docstring": "The learning rate schedule: - 'constant': `eta = eta0` - 'optimal': `eta = 1.0 / (alpha * (t + t0))` where t0 is chosen by a heuristic proposed by Leon Bottou. - 'invscaling': `eta = eta0 / pow(t, power_t)` - 'adaptive': eta = eta0, as long as the training keeps decreasing. Each time n_iter_no_change consecutive epochs fail to decrease the training loss by tol or fail to increase validation score by tol if early_stopping is True, the current learning rate is divided by 5. .. versionadded:: 0.20 Added 'adaptive' option" - }, - { - "name": "eta0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate for the 'constant', 'invscaling' or 'adaptive' schedules. The default value is 0.0 as eta0 is not used by the default schedule 'optimal'." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate [default 0.5]." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score returned by the `score` method is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20 Added 'early_stopping' option" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if `early_stopping` is True. .. versionadded:: 0.20 Added 'validation_fraction' option" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20 Added 'n_iter_no_change' option" - }, - { - "name": "class_weight", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled. If a dynamic learning rate is used, the learning rate is adapted depending on the number of samples already seen. Calling ``fit`` resets this counter, while ``partial_fit`` will result in increasing the existing counter." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights accross all updates and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches `average`. So ``average=10`` will begin averaging after seeing 10 samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data for prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nMulticlass probability estimates are derived from binary (one-vs.-rest)\nestimates by simple normalization, as recommended by Zadrozny and\nElkan.\n\nBinary probability estimates for loss=\"modified_huber\" are given by\n(clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\nit is necessary to perform proper probability calibration by wrapping\nthe classifier with\n:class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data for prediction.\n\nReturns\n-------\nndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n\nReferences\n----------\nZadrozny and Elkan, \"Transforming classifier scores into multiclass\nprobability estimates\", SIGKDD'02,\nhttp://www.research.ibm.com/people/z/zadrozny/kdd2002-Transf.pdf\n\nThe justification for the formula in the loss=\"modified_huber\"\ncase is in the appendix B in:\nhttp://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf" - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data for prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Log of probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nWhen loss=\"modified_huber\", probability estimates may be hard zeros\nand ones, so taking the logarithm is not possible.\n\nSee ``predict_proba`` for details.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data for prediction.\n\nReturns\n-------\nT : array-like, shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in\n `self.classes_`." - }, - { - "name": "_predict_log_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : str, default='hinge'\n The loss function to be used. Defaults to 'hinge', which gives a\n linear SVM.\n\n The possible options are 'hinge', 'log', 'modified_huber',\n 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n\n The 'log' loss gives logistic regression, a probabilistic classifier.\n 'modified_huber' is another smooth loss that brings tolerance to\n outliers as well as probability estimates.\n 'squared_hinge' is like hinge but is quadratically penalized.\n 'perceptron' is the linear loss used by the perceptron algorithm.\n The other losses are designed for regression but can be useful in\n classification as well; see\n :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\nn_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nlearning_rate : str, default='optimal'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\neta0 : double, default=0.0\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n the default schedule 'optimal'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate [default 0.5].\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least tol for n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nloss_function_ : concrete ``LossFunction``\n\nclasses_ : array of shape (n_classes,)\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nSee Also\n--------\nsklearn.svm.LinearSVC : Linear support vector classification.\nLogisticRegression : Logistic regression.\nPerceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n penalty=None)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> Y = np.array([1, 1, 2, 2])\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> clf = make_pipeline(StandardScaler(),\n... SGDClassifier(max_iter=1000, tol=1e-3))\n>>> clf.fit(X, Y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdclassifier', SGDClassifier())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]" - }, - { - "name": "BaseSGDRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of target values" - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples. If not provided, uniform weights are assumed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of training data\n\ny : numpy array of shape (n_samples,)\n Subset of target values\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,)\n Target values\n\ncoef_init : ndarray of shape (n_features,), default=None\n The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (1,), default=None\n The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n\nReturns\n-------\nndarray of shape (n_samples,)\n Predicted target values per element in X." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n\nReturns\n-------\nndarray of shape (n_samples,)\n Predicted target values per element in X." - }, - { - "name": "_fit_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SGDRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "'squared_loss'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used. The possible values are 'squared_loss', 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive' The 'squared_loss' refers to the ordinary least squares fit. 'huber' modifies 'squared_loss' to focus less on getting outliers correct by switching from squared to linear loss past a distance of epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is linear past that; this is the loss function used in SVR. 'squared_epsilon_insensitive' is the same but becomes squared loss past a tolerance of epsilon. More details about the losses formulas can be found in the :ref:`User Guide `." - }, - { - "name": "penalty", - "type": "Literal['l2', 'l1', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The penalty (aka regularization term) to be used. Defaults to 'l2' which is the standard regularizer for linear SVM models. 'l1' and 'elasticnet' might bring sparsity to the model (feature selection) not achievable with 'l2'." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term. The higher the value, the stronger the regularization. Also used to compute the learning rate when set to `learning_rate` is set to 'optimal'." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Only used if `penalty` is 'elasticnet'." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, training will stop when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive epochs. .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon in the epsilon-insensitive loss functions; only if `loss` is 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. For 'huber', determines the threshold at which it becomes less important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "learning_rate", - "type": "str", - "hasDefault": true, - "default": "'invscaling'", - "limitation": null, - "ignored": false, - "docstring": "The learning rate schedule: - 'constant': `eta = eta0` - 'optimal': `eta = 1.0 / (alpha * (t + t0))` where t0 is chosen by a heuristic proposed by Leon Bottou. - 'invscaling': `eta = eta0 / pow(t, power_t)` - 'adaptive': eta = eta0, as long as the training keeps decreasing. Each time n_iter_no_change consecutive epochs fail to decrease the training loss by tol or fail to increase validation score by tol if early_stopping is True, the current learning rate is divided by 5. .. versionadded:: 0.20 Added 'adaptive' option" - }, - { - "name": "eta0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate for the 'constant', 'invscaling' or 'adaptive' schedules. The default value is 0.01." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a fraction of training data as validation and terminate training when validation score returned by the `score` method is not improving by at least `tol` for `n_iter_no_change` consecutive epochs. .. versionadded:: 0.20 Added 'early_stopping' option" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if `early_stopping` is True. .. versionadded:: 0.20 Added 'validation_fraction' option" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20 Added 'n_iter_no_change' option" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled. If a dynamic learning rate is used, the learning rate is adapted depending on the number of samples already seen. Calling ``fit`` resets this counter, while ``partial_fit`` will result in increasing the existing counter." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights accross all updates and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches `average`. So ``average=10`` will begin averaging after seeing 10 samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear model fitted by minimizing a regularized empirical loss with SGD\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : str, default='squared_loss'\n The loss function to be used. The possible values are 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n The 'squared_loss' refers to the ordinary least squares fit.\n 'huber' modifies 'squared_loss' to focus less on getting outliers\n correct by switching from squared to linear loss past a distance of\n epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n linear past that; this is the loss function used in SVR.\n 'squared_epsilon_insensitive' is the same but becomes squared loss past\n a tolerance of epsilon.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\nrandom_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nlearning_rate : string, default='invscaling'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\neta0 : double, default=0.01\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.01.\n\npower_t : double, default=0.25\n The exponent for inverse scaling learning rate.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least `tol` for `n_iter_no_change` consecutive\n epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,)\n The intercept term.\n\naverage_coef_ : ndarray of shape (n_features,)\n Averaged weights assigned to the features. Only available\n if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_coef_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\naverage_intercept_ : ndarray of shape (1,)\n The averaged intercept term. Only available if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_intercept_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\nn_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDRegressor\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> reg = make_pipeline(StandardScaler(),\n... SGDRegressor(max_iter=1000, tol=1e-3))\n>>> reg.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdregressor', SGDRegressor())])\n\nSee Also\n--------\nRidge, ElasticNet, Lasso, sklearn.svm.SVR" - } - ], - "functions": [ - { - "name": "_prepare_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialization for fit_binary.\n\nReturns y, coef, intercept, average_coef, average_intercept." - }, - { - "name": "fit_binary", - "decorators": [], - "parameters": [ - { - "name": "est", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to fit" - }, - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the positive class" - }, - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter" - }, - { - "name": "C", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum step size for passive aggressive" - }, - { - "name": "learning_rate", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The learning rate. Accepted values are 'constant', 'optimal', 'invscaling', 'pa1' and 'pa2'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations (epochs)" - }, - { - "name": "pos_weight", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of the positive class" - }, - { - "name": "neg_weight", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of the negative class" - }, - { - "name": "sample_weight", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample" - }, - { - "name": "validation_mask", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed validation mask in case _fit_binary is called in the context of a one-vs-rest reduction." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a single binary classifier.\n\nThe i'th class is considered the \"positive\" class.\n\nParameters\n----------\nest : Estimator object\n The estimator to fit\n\ni : int\n Index of the positive class\n\nX : numpy array or sparse matrix of shape [n_samples,n_features]\n Training data\n\ny : numpy array of shape [n_samples, ]\n Target values\n\nalpha : float\n The regularization parameter\n\nC : float\n Maximum step size for passive aggressive\n\nlearning_rate : string\n The learning rate. Accepted values are 'constant', 'optimal',\n 'invscaling', 'pa1' and 'pa2'.\n\nmax_iter : int\n The maximum number of iterations (epochs)\n\npos_weight : float\n The weight of the positive class\n\nneg_weight : float\n The weight of the negative class\n\nsample_weight : numpy array of shape [n_samples, ]\n The weight of each sample\n\nvalidation_mask : numpy array of shape [n_samples, ], default=None\n Precomputed validation mask in case _fit_binary is called in the\n context of a one-vs-rest reduction.\n\nrandom_state : int, RandomState instance, default=None\n If int, random_state is the seed used by the random number generator;\n If RandomState instance, random_state is the random number generator;\n If None, the random number generator is the RandomState instance used\n by `np.random`." - } - ] - }, - { - "name": "sklearn.linear_model._theil_sen", - "imports": [ - "import warnings", - "from itertools import combinations", - "import numpy as np", - "from scipy import linalg", - "from scipy.special import binom", - "from scipy.linalg.lapack import get_lapack_funcs", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _base import LinearModel", - "from base import RegressorMixin", - "from utils import check_random_state", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "TheilSenRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "max_subpopulation", - "type": "int", - "hasDefault": true, - "default": "1e4", - "limitation": null, - "ignored": false, - "docstring": "Instead of computing with a set of cardinality 'n choose k', where n is the number of samples and k is the number of subsamples (at least number of features), consider only a stochastic subpopulation of a given maximal size if 'n choose k' is larger than max_subpopulation. For other than small problem sizes this parameter will determine memory usage and runtime if n_subsamples is not changed." - }, - { - "name": "n_subsamples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to calculate the parameters. This is at least the number of features (plus 1 if fit_intercept=True) and the number of samples as a maximum. A lower number leads to a higher breakdown point and a low efficiency while a high number leads to a low breakdown point and a high efficiency. If None, take the minimum number of subsamples leading to maximal robustness. If n_subsamples is set to n_samples, Theil-Sen is identical to least squares." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the calculation of spatial median." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Tolerance when calculating spatial median." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A random number generator instance to define the state of the random permutations generator. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode when fitting the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_subparams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data.\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_subpopulation : int, default=1e4\n Instead of computing with a set of cardinality 'n choose k', where n is\n the number of samples and k is the number of subsamples (at least\n number of features), consider only a stochastic subpopulation of a\n given maximal size if 'n choose k' is larger than max_subpopulation.\n For other than small problem sizes this parameter will determine\n memory usage and runtime if n_subsamples is not changed.\n\nn_subsamples : int, default=None\n Number of samples to calculate the parameters. This is at least the\n number of features (plus 1 if fit_intercept=True) and the number of\n samples as a maximum. A lower number leads to a higher breakdown\n point and a low efficiency while a high number leads to a low\n breakdown point and a high efficiency. If None, take the\n minimum number of subsamples leading to maximal robustness.\n If n_subsamples is set to n_samples, Theil-Sen is identical to least\n squares.\n\nmax_iter : int, default=300\n Maximum number of iterations for the calculation of spatial median.\n\ntol : float, default=1.e-3\n Tolerance when calculating spatial median.\n\nrandom_state : int, RandomState instance or None, default=None\n A random number generator instance to define the state of the random\n permutations generator. Pass an int for reproducible output across\n multiple function calls.\n See :term:`Glossary `\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n Coefficients of the regression model (median of distribution).\n\nintercept_ : float\n Estimated intercept of regression model.\n\nbreakdown_ : float\n Approximated breakdown point.\n\nn_iter_ : int\n Number of iterations needed for the spatial median.\n\nn_subpopulation_ : int\n Number of combinations taken into account from 'n choose k', where n is\n the number of samples and k is the number of subsamples.\n\nExamples\n--------\n>>> from sklearn.linear_model import TheilSenRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n... n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9884...\n>>> reg.predict(X[:1,])\narray([-31.5871...])\n\nReferences\n----------\n- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n http://home.olemiss.edu/~xdang/papers/MTSE.pdf" - } - ], - "functions": [ - { - "name": "_modified_weiszfeld_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "x_old", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Current start vector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Modified Weiszfeld step.\n\nThis function defines one iteration step in order to approximate the\nspatial median (L1 median). It is a form of an iteratively re-weighted\nleast squares method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nx_old : ndarray of shape = (n_features,)\n Current start vector.\n\nReturns\n-------\nx_new : ndarray of shape (n_features,)\n New iteration step.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf" - }, - { - "name": "_spatial_median", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Stop the algorithm if spatial_median has converged." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Spatial median (L1 median).\n\nThe spatial median is member of a class of so-called M-estimators which\nare defined by an optimization problem. Given a number of p points in an\nn-dimensional space, the point x minimizing the sum of all distances to the\np other points is called spatial median.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nmax_iter : int, default=300\n Maximum number of iterations.\n\ntol : float, default=1.e-3\n Stop the algorithm if spatial_median has converged.\n\nReturns\n-------\nspatial_median : ndarray of shape = (n_features,)\n Spatial median.\n\nn_iter : int\n Number of iterations needed.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf" - }, - { - "name": "_breakdown_point", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - }, - { - "name": "n_subsamples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of subsamples to consider." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Approximation of the breakdown point.\n\nParameters\n----------\nn_samples : int\n Number of samples.\n\nn_subsamples : int\n Number of subsamples to consider.\n\nReturns\n-------\nbreakdown_point : float\n Approximation of breakdown point." - }, - { - "name": "_lstsq", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Design matrix, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector, where n_samples is the number of samples." - }, - { - "name": "indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of all subsamples with respect to the chosen subpopulation." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fit intercept or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least Squares Estimator for TheilSenRegressor class.\n\nThis function calculates the least squares method on a subset of rows of X\nand y defined by the indices array. Optionally, an intercept column is\nadded if intercept is set to true.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Design matrix, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : ndarray of shape (n_samples,)\n Target vector, where n_samples is the number of samples.\n\nindices : ndarray of shape (n_subpopulation, n_subsamples)\n Indices of all subsamples with respect to the chosen subpopulation.\n\nfit_intercept : bool\n Fit intercept or not.\n\nReturns\n-------\nweights : ndarray of shape (n_subpopulation, n_features + intercept)\n Solution matrix of n_subpopulation solved least square problems." - } - ] - }, - { - "name": "sklearn.linear_model", - "imports": [ - "from _base import LinearRegression", - "from _bayes import BayesianRidge", - "from _bayes import ARDRegression", - "from _least_angle import Lars", - "from _least_angle import LassoLars", - "from _least_angle import lars_path", - "from _least_angle import lars_path_gram", - "from _least_angle import LarsCV", - "from _least_angle import LassoLarsCV", - "from _least_angle import LassoLarsIC", - "from _coordinate_descent import Lasso", - "from _coordinate_descent import ElasticNet", - "from _coordinate_descent import LassoCV", - "from _coordinate_descent import ElasticNetCV", - "from _coordinate_descent import lasso_path", - "from _coordinate_descent import enet_path", - "from _coordinate_descent import MultiTaskLasso", - "from _coordinate_descent import MultiTaskElasticNet", - "from _coordinate_descent import MultiTaskElasticNetCV", - "from _coordinate_descent import MultiTaskLassoCV", - "from _glm import PoissonRegressor", - "from _glm import GammaRegressor", - "from _glm import TweedieRegressor", - "from _huber import HuberRegressor", - "from _sgd_fast import Hinge", - "from _sgd_fast import Log", - "from _sgd_fast import ModifiedHuber", - "from _sgd_fast import SquaredLoss", - "from _sgd_fast import Huber", - "from _stochastic_gradient import SGDClassifier", - "from _stochastic_gradient import SGDRegressor", - "from _ridge import Ridge", - "from _ridge import RidgeCV", - "from _ridge import RidgeClassifier", - "from _ridge import RidgeClassifierCV", - "from _ridge import ridge_regression", - "from _logistic import LogisticRegression", - "from _logistic import LogisticRegressionCV", - "from _omp import orthogonal_mp", - "from _omp import orthogonal_mp_gram", - "from _omp import OrthogonalMatchingPursuit", - "from _omp import OrthogonalMatchingPursuitCV", - "from _passive_aggressive import PassiveAggressiveClassifier", - "from _passive_aggressive import PassiveAggressiveRegressor", - "from _perceptron import Perceptron", - "from _ransac import RANSACRegressor", - "from _theil_sen import TheilSenRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model.tests.test_base", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from scipy import linalg", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils import check_random_state", - "from sklearn.utils.fixes import parse_version", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model._base import _preprocess_data", - "from sklearn.linear_model._base import _rescale_data", - "from sklearn.linear_model._base import make_dataset", - "from sklearn.datasets import make_sparse_uncorrelated", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import load_iris" - ], - "classes": [], - "functions": [ - { - "name": "test_linear_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_positive_and_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_sample_weights_greater_than_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sparse_equal_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_multiple_outcome", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sparse_multiple_outcome", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive_multiple_outcome", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive_vs_nonpositive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive_vs_nonpositive_when_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_pd_sparse_dataframe_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_data_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_data_weighted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_preprocess_data_with_return_mean", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_csr_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_copy_data_no_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rescale_data_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fused_types_make_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_bayes", - "imports": [ - "from math import log", - "import numpy as np", - "from scipy.linalg import pinvh", - "import pytest", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils import check_random_state", - "from sklearn.linear_model import BayesianRidge", - "from sklearn.linear_model import ARDRegression", - "from sklearn.linear_model import Ridge", - "from sklearn import datasets", - "from sklearn.utils.extmath import fast_logdet" - ], - "classes": [], - "functions": [ - { - "name": "test_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check value of n_iter." - }, - { - "name": "test_bayesian_ridge_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check scores attribute shape" - }, - { - "name": "test_bayesian_ridge_score_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check value of score on toy example.\n\nCompute log marginal likelihood with equation (36) in Sparse Bayesian\nLearning and the Relevance Vector Machine (Tipping, 2001):\n\n- 0.5 * (log |Id/alpha + X.X^T/lambda| +\n y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))\n+ lambda_1 * log(lambda) - lambda_2 * lambda\n+ alpha_1 * log(alpha) - alpha_2 * alpha\n\nand check equality with the score computed during training." - }, - { - "name": "test_bayesian_ridge_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_bayesian_ridge_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_initial_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prediction_bayesian_ridge_ard_with_constant_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_std_bayesian_ridge_ard_with_constant_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_update_of_sigma_in_ard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_ard_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ard_accuracy_on_easy_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_return_std", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_update_sigma", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ard_regression_predict_normalize_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we can predict with `normalize=True` and `return_std=True`.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/18605" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_coordinate_descent", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import interpolate", - "from scipy import sparse", - "from copy import deepcopy", - "import joblib", - "from sklearn.base import is_classifier", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import make_regression", - "from sklearn.model_selection import train_test_split", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.utils.fixes import parse_version", - "from sklearn.linear_model import ARDRegression", - "from sklearn.linear_model import BayesianRidge", - "from sklearn.linear_model import ElasticNet", - "from sklearn.linear_model import ElasticNetCV", - "from sklearn.linear_model import enet_path", - "from sklearn.linear_model import Lars", - "from sklearn.linear_model import lars_path", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import LassoCV", - "from sklearn.linear_model import LassoLars", - "from sklearn.linear_model import LassoLarsCV", - "from sklearn.linear_model import LassoLarsIC", - "from sklearn.linear_model import lasso_path", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import MultiTaskElasticNet", - "from sklearn.linear_model import MultiTaskElasticNetCV", - "from sklearn.linear_model import MultiTaskLasso", - "from sklearn.linear_model import MultiTaskLassoCV", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import RidgeClassifier", - "from sklearn.linear_model import RidgeCV", - "from sklearn.linear_model._coordinate_descent import _set_order", - "from sklearn.utils import check_array", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_l1_ratio_param_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_order_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that _set_order returns arrays with promised order." - }, - { - "name": "test_set_order_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that _set_order returns sparse matrices in promised format." - }, - { - "name": "test_lasso_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "build_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "build an ill-posed linear regression problem with many noisy features and\ncomparatively few samples" - }, - { - "name": "test_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_cv_with_some_model_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_cv_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_model_pipeline_same_as_normalize_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_model_pipeline_same_dense_and_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_path_return_models_vs_new_return_gives_same_coefficients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_path_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_alpha_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_cv_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_task_lasso_and_enet", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_task_lasso_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_enetcv_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multitask_enet_and_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_multioutput_enet_and_multitask_enet_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_multioutput_lasso_and_multitask_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input_dtype_enet_and_lassocv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precompute_invalid_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_convergence_with_regularizer_decrement", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_path_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_dense_descent_paths", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_input_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_copy_X_True", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_copy_X_False_check_input_False", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_overrided_gram_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_non_float_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_float_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coef_shape_not_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_multitask_lasso", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_coordinate_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that a warning is issued if model does not converge" - }, - { - "name": "test_convergence_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lassoCV_does_not_set_precompute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_task_lasso_cv_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_sample_weight_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the impact of sample_weight is consistent." - }, - { - "name": "test_enet_sample_weight_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_models_cv_fit_for_all_backends", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_huber", - "imports": [ - "import numpy as np", - "from scipy import optimize", - "from scipy import sparse", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import HuberRegressor", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import SGDRegressor", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model._huber import _huber_loss_and_gradient" - ], - "classes": [], - "functions": [ - { - "name": "make_regression_with_outliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_equals_lr_for_high_epsilon", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_scaling_invariant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_and_sgd_same_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_better_r2_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_bool", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_least_angle", - "imports": [ - "import warnings", - "import numpy as np", - "import pytest", - "from scipy import linalg", - "from sklearn.base import clone", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.utils.fixes import np_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn import linear_model", - "from sklearn import datasets", - "from sklearn.linear_model._least_angle import _lars_path_residues", - "from sklearn.linear_model import LassoLarsIC", - "from sklearn.linear_model import lars_path", - "from sklearn.linear_model import Lars", - "from sklearn.linear_model import LassoLars", - "from io import StringIO", - "import sys" - ], - "classes": [], - "functions": [ - { - "name": "test_simple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_assert_same_lars_path_result", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_path_gram_equivalent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_x_none_gram_none_raises_value_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_lstsq", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_gives_lstsq_solution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_collinearity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_path_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_path_all_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_precompute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rank_deficient_design", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_path_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_ill_conditioned", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_ill_conditioned2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_add_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_n_nonzero_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_cv_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_ic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_path_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_path_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimatorclasses_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_R_implementation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_copyX_behaviour", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that user input regarding copy_X is not being overridden (it was until\nat least version 0.21)" - }, - { - "name": "test_lasso_lars_fit_copyX_behaviour", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that user input to .fit for copy_X overrides default __init__ value" - }, - { - "name": "test_lars_with_jitter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_none_gram_not_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_copy_X_with_auto_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_logistic", - "imports": [ - "import os", - "import sys", - "import warnings", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "import scipy.sparse as sp", - "from scipy import linalg", - "from scipy import optimize", - "from scipy import sparse", - "import pytest", - "from sklearn.base import clone", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import get_scorer", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import compute_class_weight", - "from sklearn.utils import _IS_32BIT", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils import shuffle", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.preprocessing import scale", - "from sklearn.utils._testing import skip_if_no_parallel", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model._logistic import LogisticRegression", - "from sklearn.linear_model._logistic import _logistic_regression_path", - "from sklearn.linear_model._logistic import LogisticRegressionCV", - "from sklearn.linear_model._logistic import _logistic_loss_and_grad", - "from sklearn.linear_model._logistic import _logistic_grad_hess", - "from sklearn.linear_model._logistic import _multinomial_grad_hess", - "from sklearn.linear_model._logistic import _logistic_loss", - "from sklearn.linear_model._logistic import _log_reg_scoring_path" - ], - "classes": [], - "functions": [ - { - "name": "check_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the model is able to fit the classification data" - }, - { - "name": "test_predict_2_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_mock_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_score_does_not_warn_by_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lr_liblinear_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_3_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_solver_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_binary_probabilities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparsify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inconsistent_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_write_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistency_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_path_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_dual_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_loss_and_grad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_grad_hess", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_multinomial_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_logistic_regression_string_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_intercept_logistic_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multinomial_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_solvers_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regressioncv_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_class_weight_dictionary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_multinomial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_grad_hess", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_decision_function_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_logregcv_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_saga_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_intercept_scaling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_intercept_scaling_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_l1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_l1_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_cv_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_predict_proba_multinomial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_saga_vs_liblinear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_converge_LR", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_coeffs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_l1_l2_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_vs_l1_l2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegression_elastic_net_objective", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_GridSearchCV_elastic_net", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_no_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_elasticnet_attribute_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_ratio_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_ratios_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_versus_sgd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_path_coefs_multinomial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_multi_class_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_penalty_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logisticregression_liblinear_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scores_attribute_layout_elasticnet", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_identifiability_on_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the multinomial classification is identifiable.\n\nA multinomial with c classes can be modeled with\nprobability_k = exp(X@coef_k) / sum(exp(X@coef_l), l=1..c) for k=1..c.\nThis is not identifiable, unless one chooses a further constraint.\nAccording to [1], the maximum of the L2 penalized likelihood automatically\nsatisfies the symmetric constraint:\nsum(coef_k, k=1..c) = 0\n\nFurther details can be found in the appendix of [2].\n\nReference\n---------\n.. [1] Zhu, Ji and Trevor J. Hastie. \"Classification of gene microarrays by\npenalized logistic regression\". Biostatistics 5 3 (2004): 427-43.\nhttps://doi.org/10.1093/biostatistics%2Fkxg046\n\n.. [2] Powers, Scott, Trevor J. Hastie and Robert Tibshirani. \"Nuclear\npenalized multinomial regression with an application to predicting at bat\noutcomes in baseball.\" Statistical modelling 18 5-6 (2017): 388-410 .\nhttps://arxiv.org/pdf/1706.10272.pdf" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_omp", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.linear_model import orthogonal_mp", - "from sklearn.linear_model import orthogonal_mp_gram", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model import OrthogonalMatchingPursuitCV", - "from sklearn.linear_model import LinearRegression", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import make_sparse_coded_signal" - ], - "classes": [], - "functions": [ - { - "name": "test_correct_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_shapes_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_nonzero_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tol", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_with_without_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_with_without_gram_tol", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unreachable_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perfect_signal_recovery", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_orthogonal_mp_gram_readonly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_identical_regressors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_swapped_regressors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_atoms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_return_path_prop_with_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_reaches_least_squares", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_passive_aggressive", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.base import ClassifierMixin", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import load_iris", - "from sklearn.linear_model import PassiveAggressiveClassifier", - "from sklearn.linear_model import PassiveAggressiveRegressor" - ], - "classes": [ - { - "name": "MyPassiveAggressive", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "project", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_classifier_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_correctness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_undefined_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_weight_class_balanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_equal_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_mse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_correctness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_undefined_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_passive_aggressive_deprecated_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_perceptron", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import load_iris", - "from sklearn.linear_model import Perceptron" - ], - "classes": [ - { - "name": "MyPerceptron", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "project", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_perceptron_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perceptron_correctness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_undefined_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perceptron_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that `l1_ratio` has an impact when `penalty='elasticnet'`" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_ransac", - "imports": [ - "import numpy as np", - "from scipy import sparse", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import RANSACRegressor", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model._ransac import _dynamic_max_trials", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_ransac_inliers_outliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_is_data_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_is_model_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_max_trials", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_stop_n_inliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_stop_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_resid_thresh_no_inliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_no_valid_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_no_valid_model", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_exceed_max_skips", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_warn_exceed_max_skips", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_sparse_coo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_sparse_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_sparse_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_none_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_min_n_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_multi_dimensional_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_residual_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_default_residual_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_dynamic_max_trials", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_fit_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_final_model_fit_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_ridge", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from itertools import product", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn import datasets", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import get_scorer", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import ridge_regression", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model._ridge import _RidgeGCV", - "from sklearn.linear_model import RidgeCV", - "from sklearn.linear_model import RidgeClassifier", - "from sklearn.linear_model import RidgeClassifierCV", - "from sklearn.linear_model._ridge import _solve_cholesky", - "from sklearn.linear_model._ridge import _solve_cholesky_kernel", - "from sklearn.linear_model._ridge import _check_gcv_mode", - "from sklearn.linear_model._ridge import _X_CenterStackOp", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_classification", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import make_multilabel_classification" - ], - "classes": [], - "functions": [ - { - "name": "_accuracy_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_mean_squared_error_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_primal_dual_relationship", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_singular", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_ridge_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_vs_lstsq", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_individual_penalties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_CenterStackOp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_sparse_offset_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_solver_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_gcv_vs_ridge_loo_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_loo_cv_asym_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_gcv_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_gcv_mode_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_gcv_mode_choice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_loo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_cv_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_gcv_cv_values_not_stored", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_best_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_cv_individual_penalties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_multi_ridge_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_classifier_with_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_custom_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_sparse_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_vs_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check class_weights resemble sample_weights behavior." - }, - { - "name": "test_class_weights_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_store_cv_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_classifier_cv_store_cv_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_sample_weights_greater_than_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_design_with_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_int_alphas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_negative_alphas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_solver_not_supported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_cg_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_fit_intercept_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_fit_intercept_sparse_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_fit_intercept_sparse_sag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_check_arguments_validity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check if all combinations of arguments give valid estimations" - }, - { - "name": "test_ridge_classifier_no_support_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_match_cholesky", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_dtype_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_sag_with_X_fortran", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_sag", - "imports": [ - "import math", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.special import logsumexp", - "from sklearn.linear_model._sag import get_auto_step_size", - "from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model._base import make_dataset", - "from sklearn.linear_model._logistic import _multinomial_loss_grad", - "from sklearn.utils.extmath import row_norms", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils import compute_class_weight", - "from sklearn.utils import check_random_state", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.base import clone" - ], - "classes": [], - "functions": [ - { - "name": "log_dloss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "log_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "squared_dloss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "squared_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_pobj", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "sag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "sag_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_step_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_matching", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_matching", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sag_pobj_matches_logistic_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag pobj matches log reg" - }, - { - "name": "test_sag_pobj_matches_ridge_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag pobj matches ridge reg" - }, - { - "name": "test_sag_regressor_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag regressor is computed correctly" - }, - { - "name": "test_get_auto_step_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sag_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag regressor performs well" - }, - { - "name": "test_sag_classifier_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the binary classifier is computed correctly" - }, - { - "name": "test_sag_multiclass_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the multiclass classifier is computed correctly" - }, - { - "name": "test_classifier_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if classifier results match target" - }, - { - "name": "test_binary_classifier_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests binary classifier with classweights for each class" - }, - { - "name": "test_multiclass_classifier_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests multiclass with classweights for each class" - }, - { - "name": "test_classifier_single_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if ValueError is thrown with only one class" - }, - { - "name": "test_step_size_alpha_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_loss_ground_truth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sag_classifier_raises_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_sgd", - "imports": [ - "import pickle", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.fixes import parse_version", - "from sklearn import linear_model", - "from sklearn import datasets", - "from sklearn import metrics", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.preprocessing import scale", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.model_selection import StratifiedShuffleSplit", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.linear_model import _sgd_fast as sgd_fast", - "from sklearn.model_selection import RandomizedSearchCV" - ], - "classes": [ - { - "name": "_SparseSGDClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_SparseSGDRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_update_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SGDClassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SGDRegressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SparseSGDClassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SparseSGDRegressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "asgd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_penalty", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plain_has_no_average_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_deprecated_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_late_onset_averaging_not_reached", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_late_onset_averaging_reached", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_alpha_for_optimal_learning_rate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaptive_longer_than_constant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_set_not_used_for_training", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_no_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_enough_sample_for_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_learning_rate_schedule", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_eta0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_max_iter_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_shuffle_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_early_stopping_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_validation_fraction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_n_iter_no_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_argument_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_provide_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_early_stopping_with_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_intercept_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_binary_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_intercept_to_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_at_least_two_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_weight_class_balanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass_with_init_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass_njobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_coef_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_predict_proba_method_access", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_l1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_equal_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weights_multiplied", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balanced_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_multiclass_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_then_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_equal_fit_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_losses", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiple_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_reg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_averaged_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_averaged_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_least_squares_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_epsilon_insensitive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_huber_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elasticnet_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_equal_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loss_function_epsilon", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_underflow_or_overlow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numerical_stability_large_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_large_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tol_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_gradient_common", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_hinge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_squared_hinge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_log", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_squared_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_huber", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_modified_huber", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_epsilon_insensitive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_squared_epsilon_insensitive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_thread_multi_class_and_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_core_gridsearch_and_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_SGDClassifier_fit_for_all_backends", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_sparse_coordinate_descent", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import ElasticNet", - "from sklearn.linear_model import LassoCV", - "from sklearn.linear_model import ElasticNetCV" - ], - "classes": [], - "functions": [ - { - "name": "test_sparse_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_toy_list_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_toy_explicit_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_sparse_enet_not_as_toy_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_enet_not_as_toy_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_lasso_not_as_toy_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_path_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_output_sparse_dense_lasso_and_enet_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_multiple_output_sparse_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_enet_coordinate_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that a warning is issued if model does not converge" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_theil_sen", - "imports": [ - "import os", - "import sys", - "from contextlib import contextmanager", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_less", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_warns", - "from scipy.linalg import norm", - "from scipy.optimize import fmin_bfgs", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import TheilSenRegressor", - "from sklearn.linear_model._theil_sen import _spatial_median", - "from sklearn.linear_model._theil_sen import _breakdown_point", - "from sklearn.linear_model._theil_sen import _modified_weiszfeld_step", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises" - ], - "classes": [], - "functions": [ - { - "name": "no_stdout_stderr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "gen_toy_problem_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "gen_toy_problem_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "gen_toy_problem_4d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_modweiszfeld_step_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_modweiszfeld_step_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spatial_median_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spatial_median_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_1d_no_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calc_breakdown_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_negative_subpopulation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_too_few_subsamples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_too_many_subsamples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_n_subsamples_if_less_samples_than_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subpopulation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsamples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_less_samples_than_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model._glm.glm", - "imports": [ - "import numbers", - "import numpy as np", - "import scipy.optimize", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from utils import check_array", - "from utils import check_X_y", - "from utils.optimize import _check_optimize_result", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from _loss.glm_distribution import ExponentialDispersionModel", - "from _loss.glm_distribution import TweedieDistribution", - "from _loss.glm_distribution import EDM_DISTRIBUTIONS", - "from link import BaseLink", - "from link import IdentityLink", - "from link import LogLink" - ], - "classes": [ - { - "name": "GeneralizedLinearRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "family", - "type": "Literal['normal', 'poisson', 'gamma', 'inverse-gaussian']", - "hasDefault": true, - "default": "'normal'", - "limitation": null, - "ignored": false, - "docstring": "The distributional assumption of the GLM, i.e. which distribution from the EDM, specifies the loss function to be minimized." - }, - { - "name": "link", - "type": "Literal['auto', 'identity', 'log']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The link function of the GLM, i.e. mapping from linear predictor `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets the link depending on the chosen family as follows: - 'identity' for Normal distribution - 'log' for Poisson, Gamma and Inverse Gaussian distributions" - }, - { - "name": "solver", - "type": "Literal['lbfgs']", - "hasDefault": true, - "default": "'lbfgs'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use in the optimization problem: 'lbfgs' Calls scipy's L-BFGS-B optimizer." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit a Generalized Linear Model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_linear_predictor", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the linear_predictor = `X @ coef_ + intercept_`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_pred : array of shape (n_samples,)\n Returns predicted values of linear predictor." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using GLM with feature matrix X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_pred : array of shape (n_samples,)\n Returns predicted values." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values of target." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute D^2, the percentage of deviance explained.\n\nD^2 is a generalization of the coefficient of determination R^2.\nR^2 uses squared error and D^2 deviance. Note that those two are equal\nfor ``family='normal'``.\n\nD^2 is defined as\n:math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n:math:`D_{null}` is the null deviance, i.e. the deviance of a model\nwith intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.\nThe mean :math:`\\bar{y}` is averaged by sample_weight.\nBest possible score is 1.0 and it can be negative (because the model\ncan be arbitrarily worse).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,)\n True values of target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n D^2 of self.predict(X) w.r.t. y." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at\nfitting and predicting the mean of the target y as y_pred=h(X*w).\nTherefore, the fit minimizes the following objective function with L2\npriors as regularizer::\n\n 1/(2*sum(s)) * deviance(y, h(X*w); s)\n + 1/2 * alpha * |w|_2\n\nwith inverse link function h and s=sample_weight.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nfamily : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} or an ExponentialDispersionModel instance, default='normal'\n The distributional assumption of the GLM, i.e. which distribution from\n the EDM, specifies the loss function to be minimized.\n\nlink : {'auto', 'identity', 'log'} or an instance of class BaseLink, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\nsolver : 'lbfgs', default='lbfgs'\n Algorithm to use in the optimization problem:\n\n 'lbfgs'\n Calls scipy's L-BFGS-B optimizer.\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_``.\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver." - }, - { - "name": "PoissonRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_`` ." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Generalized Linear Model with a Poisson distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n----------\n>>> from sklearn import linear_model\n>>> clf = linear_model.PoissonRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [12, 17, 22, 21]\n>>> clf.fit(X, y)\nPoissonRegressor()\n>>> clf.score(X, y)\n0.990...\n>>> clf.coef_\narray([0.121..., 0.158...])\n>>> clf.intercept_\n2.088...\n>>> clf.predict([[1, 1], [3, 4]])\narray([10.676..., 21.875...])" - }, - { - "name": "GammaRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_`` ." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Generalized Linear Model with a Gamma distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X * coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.GammaRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [19, 26, 33, 30]\n>>> clf.fit(X, y)\nGammaRegressor()\n>>> clf.score(X, y)\n0.773...\n>>> clf.coef_\narray([0.072..., 0.066...])\n>>> clf.intercept_\n2.896...\n>>> clf.predict([[1, 0], [2, 8]])\narray([19.483..., 35.795...])" - }, - { - "name": "TweedieRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "power", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The power determines the underlying target distribution according to the following table: +-------+------------------------+ | Power | Distribution | +=======+========================+ | 0 | Normal | +-------+------------------------+ | 1 | Poisson | +-------+------------------------+ | (1,2) | Compound Poisson Gamma | +-------+------------------------+ | 2 | Gamma | +-------+------------------------+ | 3 | Inverse Gaussian | +-------+------------------------+ For ``0 < power < 1``, no distribution exists." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "link", - "type": "Literal['auto', 'identity', 'log']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The link function of the GLM, i.e. mapping from linear predictor `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets the link depending on the chosen family as follows: - 'identity' for Normal distribution - 'log' for Poisson, Gamma and Inverse Gaussian distributions" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_`` ." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\npower : float, default=0\n The power determines the underlying target distribution according\n to the following table:\n\n +-------+------------------------+\n | Power | Distribution |\n +=======+========================+\n | 0 | Normal |\n +-------+------------------------+\n | 1 | Poisson |\n +-------+------------------------+\n | (1,2) | Compound Poisson Gamma |\n +-------+------------------------+\n | 2 | Gamma |\n +-------+------------------------+\n | 3 | Inverse Gaussian |\n +-------+------------------------+\n\n For ``0 < power < 1``, no distribution exists.\n\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nlink : {'auto', 'identity', 'log'}, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n----------\n>>> from sklearn import linear_model\n>>> clf = linear_model.TweedieRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [2, 3.5, 5, 5.5]\n>>> clf.fit(X, y)\nTweedieRegressor()\n>>> clf.score(X, y)\n0.839...\n>>> clf.coef_\narray([0.599..., 0.299...])\n>>> clf.intercept_\n1.600...\n>>> clf.predict([[1, 1], [3, 4]])\narray([2.500..., 4.599...])" - } - ], - "functions": [ - { - "name": "_safe_lin_pred", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the linear predictor taking care if intercept is present." - }, - { - "name": "_y_pred_deviance_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute y_pred and the derivative of the deviance w.r.t coef." - } - ] - }, - { - "name": "sklearn.linear_model._glm.link", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import expit", - "from scipy.special import logit" - ], - "classes": [ - { - "name": "BaseLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (predicted) mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the link function g(y_pred).\n\nThe link function links the mean y_pred=E[Y] to the so called linear\npredictor (X*w), i.e. g(y_pred) = linear predictor.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Usually the (predicted) mean." - }, - { - "name": "derivative", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (predicted) mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the link g'(y_pred).\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Usually the (predicted) mean." - }, - { - "name": "inverse", - "decorators": [], - "parameters": [ - { - "name": "lin_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (fitted) linear predictor." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the inverse link function h(lin_pred).\n\nGives the inverse relationship between linear predictor and the mean\ny_pred=E[Y], i.e. h(linear predictor) = y_pred.\n\nParameters\n----------\nlin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor." - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [ - { - "name": "lin_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (fitted) linear predictor." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the inverse link function h'(lin_pred).\n\nParameters\n----------\nlin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor." - } - ], - "docstring": "Abstract base class for Link functions." - }, - { - "name": "IdentityLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The identity link function g(x)=x." - }, - { - "name": "LogLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The log link function g(x)=log(x)." - }, - { - "name": "LogitLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The logit link function g(x)=logit(x)." - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._glm", - "imports": [ - "from glm import GeneralizedLinearRegressor", - "from glm import PoissonRegressor", - "from glm import GammaRegressor", - "from glm import TweedieRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model._glm.tests.test_glm", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "import pytest", - "import warnings", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model._glm import GeneralizedLinearRegressor", - "from sklearn.linear_model import TweedieRegressor", - "from sklearn.linear_model import PoissonRegressor", - "from sklearn.linear_model import GammaRegressor", - "from sklearn.linear_model._glm.link import IdentityLink", - "from sklearn.linear_model._glm.link import LogLink", - "from sklearn._loss.glm_distribution import TweedieDistribution", - "from sklearn._loss.glm_distribution import NormalDistribution", - "from sklearn._loss.glm_distribution import PoissonDistribution", - "from sklearn._loss.glm_distribution import GammaDistribution", - "from sklearn._loss.glm_distribution import InverseGaussianDistribution", - "from sklearn.linear_model import Ridge", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.model_selection import train_test_split" - ], - "classes": [], - "functions": [ - { - "name": "regression_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weights_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test the raised errors in the validation of sample_weight." - }, - { - "name": "test_glm_family_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM family argument set as string." - }, - { - "name": "test_glm_link_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM link argument set as string." - }, - { - "name": "test_glm_link_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_glm_alpha_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid alpha argument." - }, - { - "name": "test_glm_fit_intercept_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid fit_intercept argument." - }, - { - "name": "test_glm_solver_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid solver argument." - }, - { - "name": "test_glm_max_iter_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid max_iter argument." - }, - { - "name": "test_glm_tol_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid tol argument." - }, - { - "name": "test_glm_warm_start_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid warm_start argument." - }, - { - "name": "test_glm_identity_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM regression with identity link on a simple dataset." - }, - { - "name": "test_glm_sample_weight_consistentcy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the impact of sample_weight is consistent" - }, - { - "name": "test_glm_log_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM regression with log link on a simple dataset." - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normal_ridge_comparison", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare with Ridge regression for Normal distributions." - }, - { - "name": "test_poisson_glmnet", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare Poisson regression with L2 regularization and LogLink to glmnet\n " - }, - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_regression_family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gamma_regression_family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tweedie_regression_family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model._glm.tests.test_link", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "import pytest", - "from scipy.optimize import check_grad", - "from sklearn.linear_model._glm.link import IdentityLink", - "from sklearn.linear_model._glm.link import LogLink", - "from sklearn.linear_model._glm.link import LogitLink" - ], - "classes": [], - "functions": [ - { - "name": "test_link_properties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test link inverse and derivative." - }, - { - "name": "test_link_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model._glm.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.manifold.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold._isomap", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from neighbors import NearestNeighbors", - "from neighbors import kneighbors_graph", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.graph import graph_shortest_path", - "from decomposition import KernelPCA", - "from preprocessing import KernelCenterer" - ], - "classes": [ - { - "name": "Isomap", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "number of neighbors to consider for each point." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "number of coordinates for the manifold" - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "'auto' : Attempt to choose the most efficient solver for the given problem. 'arpack' : Use Arnoldi decomposition to find the eigenvalues and eigenvectors. 'dense' : Use a direct solver (i.e. LAPACK) for the eigenvalue decomposition." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance passed to arpack or lobpcg. not used if eigen_solver == 'dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the arpack solver. not used if eigen_solver == 'dense'." - }, - { - "name": "path_method", - "type": "Literal['auto', 'FW', 'D']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Method to use in finding shortest path. 'auto' : attempt to choose the best algorithm automatically. 'FW' : Floyd-Warshall algorithm. 'D' : Dijkstra's algorithm." - }, - { - "name": "neighbors_algorithm", - "type": "Literal['auto', 'brute', 'kd_tree', 'ball_tree']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use for nearest neighbors search, passed to neighbors.NearestNeighbors instance." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "\"minkowski\"", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a :term:`Glossary `. .. versionadded:: 0.22" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used. .. versionadded:: 0.22" - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "reconstruction_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the reconstruction error for the embedding.\n\nReturns\n-------\nreconstruction_error : float\n\nNotes\n-----\nThe cost function of an isomap embedding is\n\n``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\nWhere D is the matrix of distances for the input data X,\nD_fit is the matrix of distances for the output embedding X_fit,\nand K is the isomap kernel:\n\n``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, shape = (n_samples, n_features), in the form of a numpy array, sparse graph, precomputed tree, or NearestNeighbors object." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the embedding vectors for data X\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array, sparse graph, precomputed tree, or NearestNeighbors\n object.\n\ny : Ignored\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree}\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If neighbors_algorithm='precomputed', X is assumed to be a distance matrix or a sparse graph of shape (n_queries, n_samples_fit)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features)\n If neighbors_algorithm='precomputed', X is assumed to be a\n distance matrix or a sparse graph of shape\n (n_queries, n_samples_fit).\n\nReturns\n-------\nX_new : array-like, shape (n_queries, n_components)" - } - ], - "docstring": "Isomap Embedding\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n number of neighbors to consider for each point.\n\nn_components : int, default=2\n number of coordinates for the manifold\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n 'auto' : Attempt to choose the most efficient solver\n for the given problem.\n\n 'arpack' : Use Arnoldi decomposition to find the eigenvalues\n and eigenvectors.\n\n 'dense' : Use a direct solver (i.e. LAPACK)\n for the eigenvalue decomposition.\n\ntol : float, default=0\n Convergence tolerance passed to arpack or lobpcg.\n not used if eigen_solver == 'dense'.\n\nmax_iter : int, default=None\n Maximum number of iterations for the arpack solver.\n not used if eigen_solver == 'dense'.\n\npath_method : {'auto', 'FW', 'D'}, default='auto'\n Method to use in finding shortest path.\n\n 'auto' : attempt to choose the best algorithm automatically.\n\n 'FW' : Floyd-Warshall algorithm.\n\n 'D' : Dijkstra's algorithm.\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, default='auto'\n Algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmetric : string, or callable, default=\"minkowski\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `.\n\n .. versionadded:: 0.22\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n .. versionadded:: 0.22\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nembedding_ : array-like, shape (n_samples, n_components)\n Stores the embedding vectors.\n\nkernel_pca_ : object\n :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n embedding.\n\nnbrs_ : sklearn.neighbors.NearestNeighbors instance\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\ndist_matrix_ : array-like, shape (n_samples, n_samples)\n Stores the geodesic distance matrix of training data.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import Isomap\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = Isomap(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n.. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n framework for nonlinear dimensionality reduction. Science 290 (5500)" - } - ], - "functions": [] - }, - { - "name": "sklearn.manifold._locally_linear", - "imports": [ - "import numpy as np", - "from scipy.linalg import eigh", - "from scipy.linalg import svd", - "from scipy.linalg import qr", - "from scipy.linalg import solve", - "from scipy.sparse import eye", - "from scipy.sparse import csr_matrix", - "from scipy.sparse.linalg import eigsh", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from base import _UnstableArchMixin", - "from utils import check_random_state", - "from utils import check_array", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import stable_cumsum", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from neighbors import NearestNeighbors" - ], - "classes": [ - { - "name": "LocallyLinearEmbedding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "number of neighbors to consider for each point." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "number of coordinates for the manifold" - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "regularization constant, multiplies the trace of the local covariance matrix of the distances." - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "auto : algorithm will attempt to choose the best method for input data arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for 'arpack' method Not used if eigen_solver=='dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "maximum number of iterations for the arpack solver. Not used if eigen_solver=='dense'." - }, - { - "name": "method", - "type": "Literal['standard', 'hessian', 'modified', 'ltsa']", - "hasDefault": true, - "default": "'standard'", - "limitation": null, - "ignored": false, - "docstring": "standard : use the standard locally linear embedding algorithm. see reference [1] hessian : use the Hessian eigenmap method. This method requires ``n_neighbors > n_components * (1 + (n_components + 1) / 2`` see reference [2] modified : use the modified locally linear embedding algorithm. see reference [3] ltsa : use local tangent space alignment algorithm see reference [4]" - }, - { - "name": "hessian_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for Hessian eigenmapping method. Only used if ``method == 'hessian'``" - }, - { - "name": "modified_tol", - "type": "float", - "hasDefault": true, - "default": "1e-12", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for modified LLE method. Only used if ``method == 'modified'``" - }, - { - "name": "neighbors_algorithm", - "type": "Literal['auto', 'brute', 'kd_tree', 'ball_tree']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "algorithm to use for nearest neighbors search, passed to neighbors.NearestNeighbors instance" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator when ``eigen_solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the embedding vectors for data X\n\nParameters\n----------\nX : array-like of shape [n_samples, n_features]\n training set.\n\ny : Ignored\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the embedding vectors for data X and transform X.\n\nParameters\n----------\nX : array-like of shape [n_samples, n_features]\n training set.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform new points into embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : array, shape = [n_samples, n_components]\n\nNotes\n-----\nBecause of scaling performed by this method, it is discouraged to use\nit together with methods that are not scale-invariant (like SVMs)" - } - ], - "docstring": "Locally Linear Embedding\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n number of neighbors to consider for each point.\n\nn_components : int, default=2\n number of coordinates for the manifold\n\nreg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n maximum number of iterations for the arpack solver.\n Not used if eigen_solver=='dense'.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n standard : use the standard locally linear embedding algorithm. see\n reference [1]\n hessian : use the Hessian eigenmap method. This method requires\n ``n_neighbors > n_components * (1 + (n_components + 1) / 2``\n see reference [2]\n modified : use the modified locally linear embedding algorithm.\n see reference [3]\n ltsa : use local tangent space alignment algorithm\n see reference [4]\n\nhessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if ``method == 'hessian'``\n\nmodified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if ``method == 'modified'``\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, default='auto'\n algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when\n ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n across multiple function calls. See :term: `Glossary `.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nembedding_ : array-like, shape [n_samples, n_components]\n Stores the embedding vectors\n\nreconstruction_error_ : float\n Reconstruction error associated with `embedding_`\n\nnbrs_ : NearestNeighbors object\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import LocallyLinearEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = LocallyLinearEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)" - } - ], - "functions": [ - { - "name": "barycenter_weights", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "indices", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the points in Y used to compute the barycenter" - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "amount of regularization to add for the problem to be well-posed in the case of n_neighbors > n_dim" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_dim)\n\nY : array-like, shape (n_samples, n_dim)\n\nindices : array-like, shape (n_samples, n_dim)\n Indices of the points in Y used to compute the barycenter\n\nreg : float, default=1e-3\n amount of regularization to add for the problem to be\n well-posed in the case of n_neighbors > n_dim\n\nReturns\n-------\nB : array-like, shape (n_samples, n_neighbors)\n\nNotes\n-----\nSee developers note for more information." - }, - { - "name": "barycenter_kneighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, shape = (n_samples, n_features), in the form of a numpy array or a NearestNeighbors object." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample." - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Amount of regularization when solving the least-squares problem. Only relevant if mode='barycenter'. If None, use the default." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the barycenter weighted graph of k-Neighbors for points in X\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n Number of neighbors for each sample.\n\nreg : float, default=1e-3\n Amount of regularization when solving the least-squares\n problem. Only relevant if mode='barycenter'. If None, use the\n default.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix in CSR format, shape = [n_samples, n_samples]\n A[i, j] is assigned the weight of edge that connects i to j.\n\nSee Also\n--------\nsklearn.neighbors.kneighbors_graph\nsklearn.neighbors.radius_neighbors_graph" - }, - { - "name": "null_space", - "decorators": [], - "parameters": [ - { - "name": "M", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input covariance matrix: should be symmetric positive semi-definite" - }, - { - "name": "k", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of eigenvalues/vectors to return" - }, - { - "name": "k_skip", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of low eigenvalues to skip." - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'arpack'", - "limitation": null, - "ignored": false, - "docstring": "auto : algorithm will attempt to choose the best method for input data arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for 'arpack' method. Not used if eigen_solver=='dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for 'arpack' method. Not used if eigen_solver=='dense'" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator when ``solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the null space of a matrix M.\n\nParameters\n----------\nM : {array, matrix, sparse matrix, LinearOperator}\n Input covariance matrix: should be symmetric positive semi-definite\n\nk : int\n Number of eigenvalues/vectors to return\n\nk_skip : int, default=1\n Number of low eigenvalues to skip.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='arpack'\n auto : algorithm will attempt to choose the best method for input data\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method.\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n Maximum number of iterations for 'arpack' method.\n Not used if eigen_solver=='dense'\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `." - }, - { - "name": "locally_linear_embedding", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, shape = (n_samples, n_features), in the form of a numpy array or a NearestNeighbors object." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of neighbors to consider for each point." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of coordinates for the manifold." - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "regularization constant, multiplies the trace of the local covariance matrix of the distances." - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "auto : algorithm will attempt to choose the best method for input data arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for 'arpack' method Not used if eigen_solver=='dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "maximum number of iterations for the arpack solver." - }, - { - "name": "method", - "type": "Literal['standard', 'hessian', 'modified', 'ltsa']", - "hasDefault": true, - "default": "'standard'", - "limitation": null, - "ignored": false, - "docstring": "standard : use the standard locally linear embedding algorithm. see reference [1]_ hessian : use the Hessian eigenmap method. This method requires n_neighbors > n_components * (1 + (n_components + 1) / 2. see reference [2]_ modified : use the modified locally linear embedding algorithm. see reference [3]_ ltsa : use local tangent space alignment algorithm see reference [4]_" - }, - { - "name": "hessian_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for Hessian eigenmapping method. Only used if method == 'hessian'" - }, - { - "name": "modified_tol", - "type": "float", - "hasDefault": true, - "default": "1e-12", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for modified LLE method. Only used if method == 'modified'" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator when ``solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n number of neighbors to consider for each point.\n\nn_components : int\n number of coordinates for the manifold.\n\nreg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n maximum number of iterations for the arpack solver.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n standard : use the standard locally linear embedding algorithm.\n see reference [1]_\n hessian : use the Hessian eigenmap method. This method requires\n n_neighbors > n_components * (1 + (n_components + 1) / 2.\n see reference [2]_\n modified : use the modified locally linear embedding algorithm.\n see reference [3]_\n ltsa : use local tangent space alignment algorithm\n see reference [4]_\n\nhessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if method == 'hessian'\n\nmodified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if method == 'modified'\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nY : array-like, shape [n_samples, n_components]\n Embedding vectors.\n\nsquared_error : float\n Reconstruction error for the embedding vectors. Equivalent to\n ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)" - } - ] - }, - { - "name": "sklearn.manifold._mds", - "imports": [ - "import numpy as np", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "import warnings", - "from base import BaseEstimator", - "from metrics import euclidean_distances", - "from utils import check_random_state", - "from utils import check_array", - "from utils import check_symmetric", - "from isotonic import IsotonicRegression", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "MDS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dimensions in which to immerse the dissimilarities." - }, - { - "name": "metric", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, perform metric MDS; otherwise, perform nonmetric MDS." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "4", - "limitation": null, - "ignored": false, - "docstring": "Number of times the SMACOF algorithm will be run with different initializations. The final results will be the best output of the runs, determined by the run with the smallest final stress." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the SMACOF algorithm for a single run." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Level of verbosity." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with respect to stress at which to declare convergence." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. If multiple initializations are used (``n_init``), each run of the algorithm is computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "dissimilarity", - "type": "Literal['euclidean', 'precomputed']", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Dissimilarity measure to use: - 'euclidean': Pairwise Euclidean distances between points in the dataset. - 'precomputed': Pre-computed dissimilarities are passed directly to ``fit`` and ``fit_transform``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If ``dissimilarity=='precomputed'``, the input should be the dissimilarity matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the SMACOF algorithm. By default, the algorithm is initialized with a randomly chosen array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the position of the points in the embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\ny : Ignored\n\ninit : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If ``dissimilarity=='precomputed'``, the input should be the dissimilarity matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the SMACOF algorithm. By default, the algorithm is initialized with a randomly chosen array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the data from X, and returns the embedded coordinates.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\ny : Ignored\n\ninit : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array." - } - ], - "docstring": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities.\n\nmetric : bool, default=True\n If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n\nn_init : int, default=4\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\ndissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n Dissimilarity measure to use:\n\n - 'euclidean':\n Pairwise Euclidean distances between points in the dataset.\n\n - 'precomputed':\n Pre-computed dissimilarities are passed directly to ``fit`` and\n ``fit_transform``.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n Stores the position of the dataset in the embedding space.\n\nstress_ : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\ndissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Symmetric matrix that:\n\n - either uses a custom dissimilarity matrix by setting `dissimilarity`\n to 'precomputed';\n - or constructs a dissimilarity matrix from data using\n Euclidean distances.\n\nn_iter_ : int\n The number of iterations corresponding to the best stress.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import MDS\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = MDS(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)" - } - ], - "functions": [ - { - "name": "_smacof_single", - "decorators": [], - "parameters": [ - { - "name": "dissimilarities", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pairwise dissimilarities between the points. Must be symmetric." - }, - { - "name": "metric", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Compute metric or nonmetric SMACOF algorithm." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dimensions in which to immerse the dissimilarities. If an ``init`` array is provided, this option is overridden and the shape of ``init`` is used to determine the dimensionality of the embedding space." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the algorithm. By default, the algorithm is initialized with a randomly chosen array." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the SMACOF algorithm for a single run." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Level of verbosity." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with respect to stress at which to declare convergence." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes multidimensional scaling using SMACOF algorithm.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\nstress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\nn_iter : int\n The number of iterations corresponding to the best stress." - }, - { - "name": "smacof", - "decorators": [], - "parameters": [ - { - "name": "dissimilarities", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pairwise dissimilarities between the points. Must be symmetric." - }, - { - "name": "metric", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Compute metric or nonmetric SMACOF algorithm." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dimensions in which to immerse the dissimilarities. If an ``init`` array is provided, this option is overridden and the shape of ``init`` is used to determine the dimensionality of the embedding space." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the algorithm. By default, the algorithm is initialized with a randomly chosen array." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "Number of times the SMACOF algorithm will be run with different initializations. The final results will be the best output of the runs, determined by the run with the smallest final stress. If ``init`` is provided, this option is overridden and a single run is performed." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. If multiple initializations are used (``n_init``), each run of the algorithm is computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the SMACOF algorithm for a single run." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Level of verbosity." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with respect to stress at which to declare convergence." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can summarized by the following steps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\nn_init : int, default=8\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress. If ``init`` is\n provided, this option is overridden and a single run is performed.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\nstress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\nn_iter : int\n The number of iterations corresponding to the best stress. Returned\n only if ``return_n_iter`` is set to ``True``.\n\nNotes\n-----\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)" - } - ] - }, - { - "name": "sklearn.manifold._spectral_embedding", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import sparse", - "from scipy.linalg import eigh", - "from scipy.sparse.linalg import eigsh", - "from scipy.sparse.csgraph import connected_components", - "from scipy.sparse.csgraph import laplacian as csgraph_laplacian", - "from base import BaseEstimator", - "from utils import check_array", - "from utils import check_random_state", - "from utils import check_symmetric", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import _deterministic_vector_sign_flip", - "from utils.fixes import lobpcg", - "from metrics.pairwise import rbf_kernel", - "from neighbors import kneighbors_graph", - "from neighbors import NearestNeighbors", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated", - "from pyamg import smoothed_aggregation_solver" - ], - "classes": [ - { - "name": "SpectralEmbedding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The dimension of the projected subspace." - }, - { - "name": "affinity", - "type": "Literal['nearest_neighbors', 'rbf', 'precomputed', 'precomputed_nearest_neighbors']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "How to construct the affinity matrix. - 'nearest_neighbors' : construct the affinity matrix by computing a graph of nearest neighbors. - 'rbf' : construct the affinity matrix by computing a radial basis function (RBF) kernel. - 'precomputed' : interpret ``X`` as a precomputed affinity matrix. - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph of precomputed nearest neighbors, and constructs the affinity matrix by selecting the ``n_neighbors`` nearest neighbors. - callable : use passed in function as affinity the function takes in data matrix (n_samples, n_features) and return affinity matrix (n_samples, n_samples)." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for rbf kernel. If None, gamma will be set to 1/n_features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for the initialization of the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "eigen_solver", - "type": "Literal['arpack', 'lobpcg', 'amg']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems. If None, then ``'arpack'`` is used." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nearest neighbors for nearest_neighbors graph building. If None, n_neighbors will be set to max(n_samples/10, 1)." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_affinity_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : array-like of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\nY: Ignored\n\nReturns\n-------\naffinity_matrix of shape (n_samples, n_samples)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features. If affinity is \"precomputed\" X : {array-like, sparse matrix}, shape (n_samples, n_samples), Interpret X as precomputed adjacency graph computed from samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features. If affinity is \"precomputed\" X : {array-like, sparse matrix} of shape (n_samples, n_samples), Interpret X as precomputed adjacency graph computed from samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like of shape (n_samples, n_components)" - } - ], - "docstring": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n The dimension of the projected subspace.\n\naffinity : {'nearest_neighbors', 'rbf', 'precomputed', 'precomputed_nearest_neighbors'} or callable, default='nearest_neighbors'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix by computing a radial basis\n function (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - callable : use passed in function as affinity\n the function takes in data matrix (n_samples, n_features)\n and return affinity matrix (n_samples, n_samples).\n\ngamma : float, default=None\n Kernel coefficient for rbf kernel. If None, gamma will be set to\n 1/n_features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for\n reproducible results across multiple function calls.\n See :term: `Glossary `.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems.\n If None, then ``'arpack'`` is used.\n\nn_neighbors : int, default=None\n Number of nearest neighbors for nearest_neighbors graph building.\n If None, n_neighbors will be set to max(n_samples/10, 1).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n Spectral embedding of the training matrix.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Affinity_matrix constructed from samples or precomputed.\n\nn_neighbors_ : int\n Number of nearest neighbors effectively used.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import SpectralEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = SpectralEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- On Spectral Clustering: Analysis and an algorithm, 2001\n Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324" - } - ], - "functions": [ - { - "name": "_graph_connected_component", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Adjacency matrix of the graph, non-zero weight means an edge between the nodes." - }, - { - "name": "node_id", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the query node of the graph." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the largest graph connected components that contains one\ngiven node.\n\nParameters\n----------\ngraph : array-like of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\nnode_id : int\n The index of the query node of the graph.\n\nReturns\n-------\nconnected_components_matrix : array-like of shape (n_samples,)\n An array of bool value indicating the indexes of the nodes\n belonging to the largest connected components of the given query\n node." - }, - { - "name": "_graph_is_connected", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Adjacency matrix of the graph, non-zero weight means an edge between the nodes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return whether the graph is connected (True) or Not (False).\n\nParameters\n----------\ngraph : {array-like, sparse matrix} of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\nReturns\n-------\nis_connected : bool\n True means the graph is fully connected and False means not." - }, - { - "name": "_set_diag", - "decorators": [], - "parameters": [ - { - "name": "laplacian", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The graph laplacian." - }, - { - "name": "value", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The value of the diagonal." - }, - { - "name": "norm_laplacian", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether the value of the diagonal should be changed or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the diagonal of the laplacian matrix and convert it to a\nsparse format well suited for eigenvalue decomposition.\n\nParameters\n----------\nlaplacian : {ndarray, sparse matrix}\n The graph laplacian.\n\nvalue : float\n The value of the diagonal.\n\nnorm_laplacian : bool\n Whether the value of the diagonal should be changed or not.\n\nReturns\n-------\nlaplacian : {array, sparse matrix}\n An array of matrix in a form that is well suited to fast\n eigenvalue decomposition, depending on the band width of the\n matrix." - }, - { - "name": "spectral_embedding", - "decorators": [], - "parameters": [ - { - "name": "adjacency", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The adjacency matrix of the graph to embed." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The dimension of the projection subspace." - }, - { - "name": "eigen_solver", - "type": "Literal['arpack', 'lobpcg', 'amg']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for the initialization of the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "eigen_tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when using arpack eigen_solver." - }, - { - "name": "norm_laplacian", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, then compute normalized Laplacian." - }, - { - "name": "drop_first", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to drop the first eigenvector. For spectral embedding, this should be True as the first eigenvector should be constant vector for connected graph, but for spectral clustering, this should be kept as False to retain the first eigenvector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nadjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n The adjacency matrix of the graph to embed.\n\nn_components : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass\n an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nnorm_laplacian : bool, default=True\n If True, then compute normalized Laplacian.\n\ndrop_first : bool, default=True\n Whether to drop the first eigenvector. For spectral embedding, this\n should be True as the first eigenvector should be constant vector for\n connected graph, but for spectral clustering, this should be kept as\n False to retain the first eigenvector.\n\nReturns\n-------\nembedding : ndarray of shape (n_samples, n_components)\n The reduced samples.\n\nNotes\n-----\nSpectral Embedding (Laplacian Eigenmaps) is most useful when the graph\nhas one connected component. If there graph has many components, the first\nfew eigenvectors will simply uncover the connected components of the graph.\n\nReferences\n----------\n* https://en.wikipedia.org/wiki/LOBPCG\n\n* Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method\n Andrew V. Knyazev\n https://doi.org/10.1137%2FS1064827500366124" - } - ] - }, - { - "name": "sklearn.manifold._t_sne", - "imports": [ - "import warnings", - "from time import time", - "import numpy as np", - "from scipy import linalg", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import squareform", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from neighbors import NearestNeighbors", - "from base import BaseEstimator", - "from utils import check_random_state", - "from utils._openmp_helpers import _openmp_effective_n_threads", - "from utils.validation import check_non_negative", - "from utils.validation import _deprecate_positional_args", - "from decomposition import PCA", - "from metrics.pairwise import pairwise_distances", - "from None import _utils", - "from None import _barnes_hut_tsne" - ], - "classes": [ - { - "name": "TSNE", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Dimension of the embedded space." - }, - { - "name": "perplexity", - "type": "float", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. Different values can result in significantly different results." - }, - { - "name": "early_exaggeration", - "type": "float", - "hasDefault": true, - "default": "12", - "limitation": null, - "ignored": false, - "docstring": "Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a 'ball' with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers. If the cost function gets stuck in a bad local minimum increasing the learning rate may help." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the optimization. Should be at least 250." - }, - { - "name": "n_iter_without_progress", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations without progress before we abort the optimization, used after 250 initial iterations with early exaggeration. Note that progress is only checked every 50 iterations so this value is rounded to the next multiple of 50. .. versionadded:: 0.17 parameter *n_iter_without_progress* to control stopping criteria." - }, - { - "name": "min_grad_norm", - "type": "float", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "If the gradient norm is below this threshold, the optimization will be stopped." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a distance matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them. The default is \"euclidean\" which is interpreted as squared euclidean distance." - }, - { - "name": "init", - "type": "Literal['random', 'pca']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization of embedding. Possible options are 'random', 'pca', and a numpy array of shape (n_samples, n_components). PCA initialization cannot be used with precomputed distances and is usually more globally stable than random initialization." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator. Pass an int for reproducible results across multiple function calls. Note that different initializations might result in different local minima of the cost function. See :term: `Glossary `." - }, - { - "name": "method", - "type": "str", - "hasDefault": true, - "default": "'barnes_hut'", - "limitation": null, - "ignored": false, - "docstring": "By default the gradient calculation algorithm uses Barnes-Hut approximation running in O(NlogN) time. method='exact' will run on the slower, but exact, algorithm in O(N^2) time. The exact algorithm should be used when nearest-neighbor errors need to be better than 3%. However, the exact method cannot scale to millions of examples. .. versionadded:: 0.17 Approximate optimization *method* via the Barnes-Hut." - }, - { - "name": "angle", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Only used if method='barnes_hut' This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. 'angle' is the angular size (referred to as theta in [3]) of a distant node as measured from a point. If this size is below 'angle' then it is used as a summary node of all points contained within it. This method is not very sensitive to changes in this parameter in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing computation time and angle greater 0.8 has quickly increasing error." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. This parameter has no impact when ``metric=\"precomputed\"`` or (``metric=\"euclidean\"`` and ``method=\"exact\"``). ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.22" - }, - { - "name": "square_distances", - "type": "Literal[True, 'legacy']", - "hasDefault": true, - "default": "'legacy'", - "limitation": null, - "ignored": false, - "docstring": "Whether TSNE should square the distance values. ``'legacy'`` means that distance values are squared only when ``metric=\"euclidean\"``. ``True`` means that distance values are squared for all metrics. .. versionadded:: 0.24 Added to provide backward compatibility during deprecation of legacy squaring behavior. .. deprecated:: 0.24 Legacy squaring behavior was deprecated in 0.24. The ``'legacy'`` value will be removed in 1.1 (renaming of 0.26), at which point the default value will change to ``True``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function to fit the model using X as training data." - }, - { - "name": "_tsne", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Runs t-SNE." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. If the method is 'exact', X may be a sparse matrix of type 'csr', 'csc' or 'coo'. If the method is 'barnes_hut' and the metric is 'precomputed', X may be a precomputed sparse graph." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit X into an embedded space and return that transformed\noutput.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. If the method is 'exact', X may be a sparse matrix of type 'csr', 'csc' or 'coo'. If the method is 'barnes_hut' and the metric is 'precomputed', X may be a precomputed sparse graph." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit X into an embedded space.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\ny : Ignored" - } - ], - "docstring": "t-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Dimension of the embedded space.\n\nperplexity : float, default=30.0\n The perplexity is related to the number of nearest neighbors that\n is used in other manifold learning algorithms. Larger datasets\n usually require a larger perplexity. Consider selecting a value\n between 5 and 50. Different values can result in significantly\n different results.\n\nearly_exaggeration : float, default=12.0\n Controls how tight natural clusters in the original space are in\n the embedded space and how much space will be between them. For\n larger values, the space between natural clusters will be larger\n in the embedded space. Again, the choice of this parameter is not\n very critical. If the cost function increases during initial\n optimization, the early exaggeration factor or the learning rate\n might be too high.\n\nlearning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers. If the cost function gets stuck in a bad local\n minimum increasing the learning rate may help.\n\nn_iter : int, default=1000\n Maximum number of iterations for the optimization. Should be at\n least 250.\n\nn_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization, used after 250 initial iterations with early\n exaggeration. Note that progress is only checked every 50 iterations so\n this value is rounded to the next multiple of 50.\n\n .. versionadded:: 0.17\n parameter *n_iter_without_progress* to control stopping criteria.\n\nmin_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be stopped.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them. The default is \"euclidean\" which is\n interpreted as squared euclidean distance.\n\ninit : {'random', 'pca'} or ndarray of shape (n_samples, n_components), default='random'\n Initialization of embedding. Possible options are 'random', 'pca',\n and a numpy array of shape (n_samples, n_components).\n PCA initialization cannot be used with precomputed distances and is\n usually more globally stable than random initialization.\n\nverbose : int, default=0\n Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator. Pass an int for reproducible\n results across multiple function calls. Note that different\n initializations might result in different local minima of the cost\n function. See :term: `Glossary `.\n\nmethod : str, default='barnes_hut'\n By default the gradient calculation algorithm uses Barnes-Hut\n approximation running in O(NlogN) time. method='exact'\n will run on the slower, but exact, algorithm in O(N^2) time. The\n exact algorithm should be used when nearest-neighbor errors need\n to be better than 3%. However, the exact method cannot scale to\n millions of examples.\n\n .. versionadded:: 0.17\n Approximate optimization *method* via the Barnes-Hut.\n\nangle : float, default=0.5\n Only used if method='barnes_hut'\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. This parameter\n has no impact when ``metric=\"precomputed\"`` or\n (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.22\n\nsquare_distances : True or 'legacy', default='legacy'\n Whether TSNE should square the distance values. ``'legacy'`` means\n that distance values are squared only when ``metric=\"euclidean\"``.\n ``True`` means that distance values are squared for all metrics.\n\n .. versionadded:: 0.24\n Added to provide backward compatibility during deprecation of\n legacy squaring behavior.\n .. deprecated:: 0.24\n Legacy squaring behavior was deprecated in 0.24. The ``'legacy'``\n value will be removed in 1.1 (renaming of 0.26), at which point the\n default value will change to ``True``.\n\nAttributes\n----------\nembedding_ : array-like of shape (n_samples, n_components)\n Stores the embedding vectors.\n\nkl_divergence_ : float\n Kullback-Leibler divergence after optimization.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.manifold import TSNE\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> X_embedded = TSNE(n_components=2).fit_transform(X)\n>>> X_embedded.shape\n(4, 2)\n\nReferences\n----------\n\n[1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n[2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n https://lvdmaaten.github.io/tsne/\n\n[3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf" - } - ], - "functions": [ - { - "name": "_joint_probabilities", - "decorators": [], - "parameters": [ - { - "name": "distances", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances of samples are stored as condensed matrices, i.e. we omit the diagonal and duplicate entries and store everything in a one-dimensional array." - }, - { - "name": "desired_perplexity", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired perplexity of the joint probability distributions." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute joint probabilities p_ij from distances.\n\nParameters\n----------\ndistances : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Distances of samples are stored as condensed matrices, i.e.\n we omit the diagonal and duplicate entries and store everything\n in a one-dimensional array.\n\ndesired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\nverbose : int\n Verbosity level.\n\nReturns\n-------\nP : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix." - }, - { - "name": "_joint_probabilities_nn", - "decorators": [], - "parameters": [ - { - "name": "distances", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances of samples to its n_neighbors nearest neighbors. All other distances are left to zero (and are not materialized in memory). Matrix should be of CSR format." - }, - { - "name": "desired_perplexity", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired perplexity of the joint probability distributions." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute joint probabilities p_ij from distances using just nearest\nneighbors.\n\nThis method is approximately equal to _joint_probabilities. The latter\nis O(N), but limiting the joint probability to nearest neighbors improves\nthis substantially to O(uN).\n\nParameters\n----------\ndistances : sparse matrix of shape (n_samples, n_samples)\n Distances of samples to its n_neighbors nearest neighbors. All other\n distances are left to zero (and are not materialized in memory).\n Matrix should be of CSR format.\n\ndesired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\nverbose : int\n Verbosity level.\n\nReturns\n-------\nP : sparse matrix of shape (n_samples, n_samples)\n Condensed joint probability matrix with only nearest neighbors. Matrix\n will be of CSR format." - }, - { - "name": "_kl_divergence", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Unraveled embedding." - }, - { - "name": "P", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Condensed joint probability matrix." - }, - { - "name": "degrees_of_freedom", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degrees of freedom of the Student's-t distribution." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension of the embedded space." - }, - { - "name": "skip_num_points", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "This does not compute the gradient for points with indices below `skip_num_points`. This is useful when computing transforms of new data where you'd like to keep the old data fixed." - }, - { - "name": "compute_error: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - }, - { - "name": "default=True", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "t-SNE objective function: gradient of the KL divergence\nof p_ijs and q_ijs and the absolute error.\n\nParameters\n----------\nparams : ndarray of shape (n_params,)\n Unraveled embedding.\n\nP : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix.\n\ndegrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\nn_samples : int\n Number of samples.\n\nn_components : int\n Dimension of the embedded space.\n\nskip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\ncompute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\nReturns\n-------\nkl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\ngrad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding." - }, - { - "name": "_kl_divergence_bh", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Unraveled embedding." - }, - { - "name": "P", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sparse approximate joint probability matrix, computed only for the k nearest-neighbors and symmetrized. Matrix should be of CSR format." - }, - { - "name": "degrees_of_freedom", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degrees of freedom of the Student's-t distribution." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension of the embedded space." - }, - { - "name": "angle", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. 'angle' is the angular size (referred to as theta in [3]) of a distant node as measured from a point. If this size is below 'angle' then it is used as a summary node of all points contained within it. This method is not very sensitive to changes in this parameter in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing computation time and angle greater 0.8 has quickly increasing error." - }, - { - "name": "skip_num_points", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "This does not compute the gradient for points with indices below `skip_num_points`. This is useful when computing transforms of new data where you'd like to keep the old data fixed." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "compute_error: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - }, - { - "name": "default=True", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - }, - { - "name": "num_threads", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of threads used to compute the gradient. This is set here to avoid calling _openmp_effective_n_threads for each gradient step." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "t-SNE objective function: KL divergence of p_ijs and q_ijs.\n\nUses Barnes-Hut tree methods to calculate the gradient that\nruns in O(NlogN) instead of O(N^2).\n\nParameters\n----------\nparams : ndarray of shape (n_params,)\n Unraveled embedding.\n\nP : sparse matrix of shape (n_samples, n_sample)\n Sparse approximate joint probability matrix, computed only for the\n k nearest-neighbors and symmetrized. Matrix should be of CSR format.\n\ndegrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\nn_samples : int\n Number of samples.\n\nn_components : int\n Dimension of the embedded space.\n\nangle : float, default=0.5\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\nskip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\nverbose : int, default=False\n Verbosity level.\n\ncompute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\nnum_threads : int, default=1\n Number of threads used to compute the gradient. This is set here to\n avoid calling _openmp_effective_n_threads for each gradient step.\n\nReturns\n-------\nkl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\ngrad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding." - }, - { - "name": "_gradient_descent", - "decorators": [], - "parameters": [ - { - "name": "objective", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return a tuple of cost and gradient for a given parameter vector. When expensive to compute, the cost can optionally be None and can be computed every n_iter_check steps using the objective_error function." - }, - { - "name": "p0", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial parameter vector." - }, - { - "name": "it", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Current number of iterations (this function will be called more than once during the optimization)." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of gradient descent iterations." - }, - { - "name": "n_iter_check", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations before evaluating the global error. If the error is sufficiently low, we abort the optimization." - }, - { - "name": "n_iter_without_progress", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations without progress before we abort the optimization." - }, - { - "name": "momentum", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The momentum generates a weight for previous gradients that decays exponentially." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a 'ball' with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers." - }, - { - "name": "min_gain", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum individual gain for each parameter." - }, - { - "name": "min_grad_norm", - "type": "float", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "If the gradient norm is below this threshold, the optimization will be aborted." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "args", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments to pass to objective function." - }, - { - "name": "kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to pass to objective function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Batch gradient descent with momentum and individual gains.\n\nParameters\n----------\nobjective : callable\n Should return a tuple of cost and gradient for a given parameter\n vector. When expensive to compute, the cost can optionally\n be None and can be computed every n_iter_check steps using\n the objective_error function.\n\np0 : array-like of shape (n_params,)\n Initial parameter vector.\n\nit : int\n Current number of iterations (this function will be called more than\n once during the optimization).\n\nn_iter : int\n Maximum number of gradient descent iterations.\n\nn_iter_check : int, default=1\n Number of iterations before evaluating the global error. If the error\n is sufficiently low, we abort the optimization.\n\nn_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization.\n\nmomentum : float within (0.0, 1.0), default=0.8\n The momentum generates a weight for previous gradients that decays\n exponentially.\n\nlearning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers.\n\nmin_gain : float, default=0.01\n Minimum individual gain for each parameter.\n\nmin_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be aborted.\n\nverbose : int, default=0\n Verbosity level.\n\nargs : sequence, default=None\n Arguments to pass to objective function.\n\nkwargs : dict, default=None\n Keyword arguments to pass to objective function.\n\nReturns\n-------\np : ndarray of shape (n_params,)\n Optimum parameters.\n\nerror : float\n Optimum.\n\ni : int\n Last iteration." - }, - { - "name": "trustworthiness", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row." - }, - { - "name": "X_embedded", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Embedding of the training data in low-dimensional space." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors k that will be considered." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Which metric to use for computing pairwise distances between samples from the original input space. If metric is 'precomputed', X must be a matrix of pairwise distances or squared distances. Otherwise, see the documentation of argument metric in sklearn.pairwise.pairwise_distances for a list of available metrics. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\n* \"Neighborhood Preservation in Nonlinear Projection Methods: An\n Experimental Study\"\n J. Venna, S. Kaski\n* \"Learning a Parametric Embedding by Preserving Local Structure\"\n L.J.P. van der Maaten\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row.\n\nX_embedded : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.\n\nn_neighbors : int, default=5\n Number of neighbors k that will be considered.\n\nmetric : str or callable, default='euclidean'\n Which metric to use for computing pairwise distances between samples\n from the original input space. If metric is 'precomputed', X must be a\n matrix of pairwise distances or squared distances. Otherwise, see the\n documentation of argument metric in sklearn.pairwise.pairwise_distances\n for a list of available metrics.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ntrustworthiness : float\n Trustworthiness of the low-dimensional embedding." - } - ] - }, - { - "name": "sklearn.manifold", - "imports": [ - "from _locally_linear import locally_linear_embedding", - "from _locally_linear import LocallyLinearEmbedding", - "from _isomap import Isomap", - "from _mds import MDS", - "from _mds import smacof", - "from _spectral_embedding import SpectralEmbedding", - "from _spectral_embedding import spectral_embedding", - "from _t_sne import TSNE", - "from _t_sne import trustworthiness" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.manifold.tests.test_isomap", - "imports": [ - "from itertools import product", - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_almost_equal", - "import pytest", - "from sklearn import datasets", - "from sklearn import manifold", - "from sklearn import neighbors", - "from sklearn import pipeline", - "from sklearn import preprocessing", - "from scipy.sparse import rand as sparse_rand" - ], - "classes": [], - "functions": [ - { - "name": "test_isomap_simple_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isomap_reconstruction_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_with_nearest_neighbors_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_different_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isomap_clone_bug", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_locally_linear", - "imports": [ - "from itertools import product", - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_almost_equal", - "from scipy import linalg", - "import pytest", - "from sklearn import neighbors", - "from sklearn import manifold", - "from sklearn.manifold._locally_linear import barycenter_kneighbors_graph", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn import pipeline", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_barycenter_kneighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lle_simple_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lle_manifold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lle_init_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_integer_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_mds", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "import pytest", - "from sklearn.manifold import _mds as mds", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_smacof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_smacof_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_MDS", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_MDS_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_MDS_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_spectral_embedding", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from scipy.sparse import csgraph", - "from scipy.linalg import eigh", - "from sklearn.manifold import SpectralEmbedding", - "from sklearn.manifold._spectral_embedding import _graph_is_connected", - "from sklearn.manifold._spectral_embedding import _graph_connected_component", - "from sklearn.manifold import spectral_embedding", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.metrics import normalized_mutual_info_score", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.cluster import KMeans", - "from sklearn.datasets import make_blobs", - "from sklearn.utils.extmath import _deterministic_vector_sign_flip", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "_assert_equal_with_sign_flipping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check array A and B are equal with possible sign flipping on\neach columns" - }, - { - "name": "test_sparse_graph_connected_component", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_two_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_precomputed_affinity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_nearest_neighbors_filtering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_callable_affinity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_amg_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_amg_solver_failure", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_spectral_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_unknown_eigensolver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_unknown_affinity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_deterministic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_unnormalized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_first_eigen_vector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_t_sne", - "imports": [ - "import sys", - "from io import StringIO", - "import numpy as np", - "from numpy.testing import assert_allclose", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.neighbors import kneighbors_graph", - "from sklearn.exceptions import EfficiencyWarning", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils import check_random_state", - "from sklearn.manifold._t_sne import _joint_probabilities", - "from sklearn.manifold._t_sne import _joint_probabilities_nn", - "from sklearn.manifold._t_sne import _kl_divergence", - "from sklearn.manifold._t_sne import _kl_divergence_bh", - "from sklearn.manifold._t_sne import _gradient_descent", - "from sklearn.manifold._t_sne import trustworthiness", - "from sklearn.manifold import TSNE", - "from sklearn.manifold import _barnes_hut_tsne", - "from sklearn.manifold._utils import _binary_search_perplexity", - "from sklearn.datasets import make_blobs", - "from scipy.optimize import check_grad", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import squareform", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.metrics.pairwise import manhattan_distances", - "from sklearn.metrics.pairwise import cosine_distances", - "from scipy.sparse import csr_matrix" - ], - "classes": [], - "functions": [ - { - "name": "test_gradient_descent_stops", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_search_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_perplexity_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_trustworthiness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preserve_trustworthiness_approximately", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_optimization_minimizes_kl_divergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "t-SNE should give a lower KL divergence with more iterations." - }, - { - "name": "test_fit_csr_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preserve_trustworthiness_approximately_with_precomputed_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_trustworthiness_not_euclidean_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_exaggeration_too_small", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_too_few_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_precomputed_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_exact_no_precomputed_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_high_perplexity_precomputed_sparse_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_precomputed_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that TSNE works identically for sparse and dense matrix" - }, - { - "name": "test_non_positive_computed_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_ndarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_ndarray_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_distance_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_method_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_square_distances_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_angle_out_of_range_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_initialization_not_compatible_with_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_exaggeration_used", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_used", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_answer_gradient_two_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_answer_gradient_four_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_skip_num_points_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_answer_test", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chebyshev_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_reduction_to_one_component", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_64bit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kl_divergence_not_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_barnes_hut_angle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_without_progress", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_grad_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_accessible_kl_divergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that TSNE can approximately recover a uniform 2D grid\n\nDue to ties in distances between point in X_2d_grid, this test is platform\ndependent for ``method='barnes_hut'`` due to numerical imprecision.\n\nAlso, t-SNE is not assured to converge to the right solution because bad\ninitialization can lead to convergence to bad local minimum (the\noptimization problem is non-convex). To avoid breaking the test too often,\nwe re-run t-SNE from the final point when the convergence is not good\nenough." - }, - { - "name": "assert_uniform_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bh_match_exact", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_bh_multithread_match_sequential", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne_with_different_distance_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that TSNE works for different distance metrics" - }, - { - "name": "test_tsne_different_square_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne_square_distances_futurewarning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne_n_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that the n_jobs parameter doesn't impact the output" - } - ] - }, - { - "name": "sklearn.manifold.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.pairwise", - "imports": [ - "import itertools", - "from functools import partial", - "import warnings", - "import numpy as np", - "from scipy.spatial import distance", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from utils.validation import _num_samples", - "from utils.validation import check_non_negative", - "from utils import check_array", - "from utils import gen_even_slices", - "from utils import gen_batches", - "from utils import get_chunk_n_rows", - "from utils import is_scalar_nan", - "from utils.extmath import row_norms", - "from utils.extmath import safe_sparse_dot", - "from preprocessing import normalize", - "from utils._mask import _get_mask", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils.fixes import sp_version", - "from utils.fixes import parse_version", - "from _pairwise_fast import _chi2_kernel_fast", - "from _pairwise_fast import _sparse_manhattan", - "from exceptions import DataConversionWarning", - "from sklearn.neighbors import DistanceMetric", - "from gaussian_process.kernels import Kernel as GPKernel" - ], - "classes": [], - "functions": [ - { - "name": "_return_float_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "1. If dtype of X and Y is float32, then dtype float32 is returned.\n2. Else dtype float is returned." - }, - { - "name": "check_pairwise_arrays", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "precomputed", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "True if X is to be treated as precomputed distances to the samples in Y." - }, - { - "name": "dtype", - "type": "Union[List, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data type required for X and Y. If None, the dtype will be an appropriate float type selected by _return_float_dtype. .. versionadded:: 0.18" - }, - { - "name": "accept_sparse", - "type": "Union[bool, str]", - "hasDefault": true, - "default": "'csr'", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: - True: Force all values of array to be finite. - False: accepts np.inf, np.nan, pd.NA in array. - 'allow-nan': accepts only np.nan and pd.NA values in array. Values cannot be infinite. .. versionadded:: 0.22 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set X and Y appropriately and checks inputs.\n\nIf Y is None, it is set as a pointer to X (i.e. not a copy).\nIf Y is given, this does not happen.\nAll distance metrics should use this function first to assert that the\ngiven parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats (or dtype if provided). Finally, the function\nchecks that the size of the second dimension of the two arrays is equal, or\nthe equivalent check for a precomputed distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\nprecomputed : bool, default=False\n True if X is to be treated as precomputed distances to the samples in\n Y.\n\ndtype : str, type, list of type, default=None\n Data type required for X and Y. If None, the dtype will be an\n appropriate float type selected by _return_float_dtype.\n\n .. versionadded:: 0.18\n\naccept_sparse : str, bool or list/tuple of str, default='csr'\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nsafe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\nsafe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X." - }, - { - "name": "check_paired_arrays", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set X and Y appropriately and checks inputs for paired distances.\n\nAll paired distance metrics should use this function first to assert that\nthe given parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats. Finally, the function checks that the size\nof the dimensions of the two arrays are equal.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\nReturns\n-------\nsafe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\nsafe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X." - }, - { - "name": "euclidean_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y_norm_squared", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-computed dot-products of vectors in Y (e.g., ``(Y**2).sum(axis=1)``) May be ignored in some cases, see the note below." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return squared Euclidean distances." - }, - { - "name": "X_norm_squared", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-computed dot-products of vectors in X (e.g., ``(X**2).sum(axis=1)``) May be ignored in some cases, see the note below." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Considering the rows of X (and Y=X) as vectors, compute the\ndistance matrix between each pair of vectors.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\n dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features), default=None\n\nY_norm_squared : array-like of shape (n_samples_Y,), default=None\n Pre-computed dot-products of vectors in Y (e.g.,\n ``(Y**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\nsquared : bool, default=False\n Return squared Euclidean distances.\n\nX_norm_squared : array-like of shape (n_samples,), default=None\n Pre-computed dot-products of vectors in X (e.g.,\n ``(X**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\nNotes\n-----\nTo achieve better accuracy, `X_norm_squared`\u00a0and `Y_norm_squared` may be\nunused if they are passed as ``float32``.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\npaired_distances : Distances betweens pairs of elements of X and Y.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import euclidean_distances\n>>> X = [[0, 1], [1, 1]]\n>>> # distance between rows of X\n>>> euclidean_distances(X, X)\narray([[0., 1.],\n [1., 0.]])\n>>> # get distance to origin\n>>> euclidean_distances(X, [[0, 0]])\narray([[1. ],\n [1.41421356]])" - }, - { - "name": "nan_euclidean_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return squared Euclidean distances." - }, - { - "name": "missing_values", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Representation of missing value." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Make and use a deep copy of X and Y (if Y exists)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\n dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n where,\n weight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n .. math::\n \\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nX : array-like of shape=(n_samples_X, n_features)\n\nY : array-like of shape=(n_samples_Y, n_features), default=None\n\nsquared : bool, default=False\n Return squared Euclidean distances.\n\nmissing_values : np.nan or int, default=np.nan\n Representation of missing value.\n\ncopy : bool, default=True\n Make and use a deep copy of X and Y (if Y exists).\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\npaired_distances : Distances between pairs of elements of X and Y.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import nan_euclidean_distances\n>>> nan = float(\"NaN\")\n>>> X = [[0, 1], [1, nan]]\n>>> nan_euclidean_distances(X, X) # distance between rows of X\narray([[0. , 1.41421356],\n [1.41421356, 0. ]])\n\n>>> # get distance to origin\n>>> nan_euclidean_distances(X, [[0, 0]])\narray([[1. ],\n [1.41421356]])\n\nReferences\n----------\n* John K. Dixon, \"Pattern Recognition with Partly Missing Data\",\n IEEE Transactions on Systems, Man, and Cybernetics, Volume: 9, Issue:\n 10, pp. 617 - 621, Oct. 1979.\n http://ieeexplore.ieee.org/abstract/document/4310090/" - }, - { - "name": "_euclidean_distances_upcast", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Euclidean distances between X and Y.\n\nAssumes X and Y have float32 dtype.\nAssumes XX and YY have float64 dtype or are None.\n\nX and Y are upcast to float64 by chunks, which size is chosen to limit\nmemory increase by approximately 10% (at least 10MiB)." - }, - { - "name": "_argmin_min_reduce", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pairwise_distances_argmin_min", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing points." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing points." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the argmin and distances are to be computed." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "metric_kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to pass to specified metric function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Array containing points.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n Array containing points.\n\naxis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default='euclidean'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\nmetric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\ndistances : ndarray\n distances[i] is the distance between the i-th row in X and the\n argmin[i]-th row in Y.\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin" - }, - { - "name": "pairwise_distances_argmin", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing points." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arrays containing points." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the argmin and distances are to be computed." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"euclidean\"", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "metric_kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to pass to specified metric function." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n Array containing points.\n\nY : array-like of shape (n_samples_Y, n_features)\n Arrays containing points.\n\naxis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default=\"euclidean\"\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\nmetric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : numpy.ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin_min" - }, - { - "name": "haversine_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\n D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]\n\nParameters\n----------\nX : array-like of shape (n_samples_X, 2)\n\nY : array-like of shape (n_samples_Y, 2), default=None\n\nReturns\n-------\ndistance : ndarray of shape (n_samples_X, n_samples_Y)\n\nNotes\n-----\nAs the Earth is nearly spherical, the haversine formula provides a good\napproximation of the distance between two points of the Earth surface, with\na less than 1% error on average.\n\nExamples\n--------\nWe want to calculate the distance between the Ezeiza Airport\n(Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,\nFrance).\n\n>>> from sklearn.metrics.pairwise import haversine_distances\n>>> from math import radians\n>>> bsas = [-34.83333, -58.5166646]\n>>> paris = [49.0083899664, 2.53844117956]\n>>> bsas_in_radians = [radians(_) for _ in bsas]\n>>> paris_in_radians = [radians(_) for _ in paris]\n>>> result = haversine_distances([bsas_in_radians, paris_in_radians])\n>>> result * 6371000/1000 # multiply by Earth radius to get kilometers\narray([[ 0. , 11099.54035582],\n [11099.54035582, 0. ]])" - }, - { - "name": "manhattan_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sum_over_features", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True the function returns the pairwise distance matrix else it returns the componentwise L1 pairwise-distances. Not supported for sparse matrix inputs." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : array-like of shape (n_samples_Y, n_features), default=None\n\nsum_over_features : bool, default=True\n If True the function returns the pairwise distance matrix\n else it returns the componentwise L1 pairwise-distances.\n Not supported for sparse matrix inputs.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X * n_samples_Y, n_features) or (n_samples_X, n_samples_Y)\n If sum_over_features is False shape is\n (n_samples_X * n_samples_Y, n_features) and D contains the\n componentwise L1 pairwise-distances (ie. absolute difference),\n else shape is (n_samples_X, n_samples_Y) and D contains\n the pairwise L1 distances.\n\nNotes\n--------\nWhen X and/or Y are CSR sparse matrices and they are not already\nin canonical format, this function modifies them in-place to\nmake them canonical.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import manhattan_distances\n>>> manhattan_distances([[3]], [[3]])\narray([[0.]])\n>>> manhattan_distances([[3]], [[2]])\narray([[1.]])\n>>> manhattan_distances([[2]], [[3]])\narray([[1.]])\n>>> manhattan_distances([[1, 2], [3, 4]], [[1, 2], [0, 3]])\narray([[0., 2.],\n [4., 4.]])\n>>> import numpy as np\n>>> X = np.ones((1, 2))\n>>> y = np.full((2, 2), 2.)\n>>> manhattan_distances(X, y, sum_over_features=False)\narray([[1., 1.],\n [1., 1.]])" - }, - { - "name": "cosine_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix `X`." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix `Y`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute cosine distance between samples in X and Y.\n\nCosine distance is defined as 1.0 minus the cosine similarity.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Matrix `X`.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features), default=None\n Matrix `Y`.\n\nReturns\n-------\ndistance matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\ncosine_similarity\nscipy.spatial.distance.cosine : Dense matrices only." - }, - { - "name": "paired_euclidean_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the paired euclidean distances between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)" - }, - { - "name": "paired_manhattan_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)" - }, - { - "name": "paired_cosine_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the paired cosine distances between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)\n\nNotes\n-----\nThe cosine distance is equivalent to the half the squared\neuclidean distance if each sample is normalized to unit norm." - }, - { - "name": "paired_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array 1 for distance computation." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array 2 for distance computation." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"euclidean\"", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options specified in PAIRED_DISTANCES, including \"euclidean\", \"manhattan\", or \"cosine\". Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the paired distances between X and Y.\n\nComputes the distances between (X[0], Y[0]), (X[1], Y[1]), etc...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Array 1 for distance computation.\n\nY : ndarray of shape (n_samples, n_features)\n Array 2 for distance computation.\n\nmetric : str or callable, default=\"euclidean\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n specified in PAIRED_DISTANCES, including \"euclidean\",\n \"manhattan\", or \"cosine\".\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)\n\nSee Also\n--------\npairwise_distances : Computes the distance between every pair of samples.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import paired_distances\n>>> X = [[0, 1], [1, 1]]\n>>> Y = [[0, 1], [2, 1]]\n>>> paired_distances(X, Y)\narray([0., 1.])" - }, - { - "name": "linear_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the linear kernel between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "polynomial_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the polynomial kernel between X and Y::\n\n K(X, Y) = (gamma + coef0)^degree\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndegree : int, default=3\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "sigmoid_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the sigmoid kernel between X and Y::\n\n K(X, Y) = tanh(gamma + coef0)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "rbf_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the rbf (gaussian) kernel between X and Y::\n\n K(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "laplacian_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\n K(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "cosine_similarity", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``." - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. .. versionadded:: 0.17 parameter ``dense_output`` for dense output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\n K(X, Y) = / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n Input data.\n\nY : {ndarray, sparse matrix} of shape (n_samples_Y, n_features), default=None\n Input data. If ``None``, the output will be the pairwise\n similarities between all samples in ``X``.\n\ndense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.17\n parameter ``dense_output`` for dense output.\n\nReturns\n-------\nkernel matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "additive_chi2_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.\n\nNotes\n-----\nAs the negative of a distance, this kernel is only conditionally positive\ndefinite.\n\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nchi2_kernel : The exponentiated version of the kernel, which is usually\n preferable.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf" - }, - { - "name": "chi2_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Scaling parameter of the chi2 kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=1.\n Scaling parameter of the chi2 kernel.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nadditive_chi2_kernel : The additive version of this kernel.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to the additive version of this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf" - }, - { - "name": "distance_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Valid metrics for pairwise_distances.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists to allow for a description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n\n=============== ========================================\nmetric Function\n=============== ========================================\n'cityblock' metrics.pairwise.manhattan_distances\n'cosine' metrics.pairwise.cosine_distances\n'euclidean' metrics.pairwise.euclidean_distances\n'haversine' metrics.pairwise.haversine_distances\n'l1' metrics.pairwise.manhattan_distances\n'l2' metrics.pairwise.euclidean_distances\n'manhattan' metrics.pairwise.manhattan_distances\n'nan_euclidean' metrics.pairwise.nan_euclidean_distances\n=============== ========================================\n\nRead more in the :ref:`User Guide `." - }, - { - "name": "_dist_wrapper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Write in-place to a slice of a distance matrix." - }, - { - "name": "_parallel_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Break the pairwise matrix in n_jobs even slices\nand compute them in parallel." - }, - { - "name": "_pairwise_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Handle the callable case for pairwise_{distances,kernels}.\n " - }, - { - "name": "_check_chunk_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Checks chunk is a sequence of expected size or a tuple of same.\n " - }, - { - "name": "_precompute_metric_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Precompute data-derived metric parameters if not provided.\n " - }, - { - "name": "pairwise_distances_chunked", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of pairwise distances between samples, or a feature array. The shape the array should be (n_samples_X, n_samples_X) if metric='precomputed' and (n_samples_X, n_features) otherwise." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional second feature array. Only allowed if metric != \"precomputed\"." - }, - { - "name": "reduce_func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which is applied on each chunk of the distance matrix, reducing it to needed values. ``reduce_func(D_chunk, start)`` is called repeatedly, where ``D_chunk`` is a contiguous vertical slice of the pairwise distance matrix, starting at row ``start``. It should return one of: None; an array, a list, or a sparse matrix of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning None is useful for in-place operations, rather than reductions. If None, pairwise_distances_chunked returns a generator of vertical chunks of the distance matrix." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a distance matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used." - }, - { - "name": "`**kwds`", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a scipy.spatial.distance metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be stored at\nonce, this is used to calculate pairwise distances in\n``working_memory``-sized chunks. If ``reduce_func`` is given, it is run\non each chunk and its return values are concatenated into lists, arrays\nor sparse matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape the array should be (n_samples_X, n_samples_X) if\n metric='precomputed' and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\nreduce_func : callable, default=None\n The function which is applied on each chunk of the distance matrix,\n reducing it to needed values. ``reduce_func(D_chunk, start)``\n is called repeatedly, where ``D_chunk`` is a contiguous vertical\n slice of the pairwise distance matrix, starting at row ``start``.\n It should return one of: None; an array, a list, or a sparse matrix\n of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\n None is useful for in-place operations, rather than reductions.\n\n If None, pairwise_distances_chunked returns a generator of vertical\n chunks of the distance matrix.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\n`**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nYields\n------\nD_chunk : {ndarray, sparse matrix}\n A contiguous slice of distance matrix, optionally processed by\n ``reduce_func``.\n\nExamples\n--------\nWithout reduce_func:\n\n>>> import numpy as np\n>>> from sklearn.metrics import pairwise_distances_chunked\n>>> X = np.random.RandomState(0).rand(5, 3)\n>>> D_chunk = next(pairwise_distances_chunked(X))\n>>> D_chunk\narray([[0. ..., 0.29..., 0.41..., 0.19..., 0.57...],\n [0.29..., 0. ..., 0.57..., 0.41..., 0.76...],\n [0.41..., 0.57..., 0. ..., 0.44..., 0.90...],\n [0.19..., 0.41..., 0.44..., 0. ..., 0.51...],\n [0.57..., 0.76..., 0.90..., 0.51..., 0. ...]])\n\nRetrieve all neighbors and average distance within radius r:\n\n>>> r = .2\n>>> def reduce_func(D_chunk, start):\n... neigh = [np.flatnonzero(d < r) for d in D_chunk]\n... avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n... return neigh, avg_dist\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n>>> neigh, avg_dist = next(gen)\n>>> neigh\n[array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n>>> avg_dist\narray([0.039..., 0. , 0. , 0.039..., 0. ])\n\nWhere r is defined per sample, we need to make use of ``start``:\n\n>>> r = [.2, .4, .4, .3, .1]\n>>> def reduce_func(D_chunk, start):\n... neigh = [np.flatnonzero(d < r[i])\n... for i, d in enumerate(D_chunk, start)]\n... return neigh\n>>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n>>> neigh\n[array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\nForce row-by-row generation by reducing ``working_memory``:\n\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n... working_memory=0)\n>>> next(gen)\n[array([0, 3])]\n>>> next(gen)\n[array([0, 1])]" - }, - { - "name": "pairwise_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of pairwise distances between samples, or a feature array. The shape of the array should be (n_samples_X, n_samples_X) if metric == \"precomputed\" and (n_samples_X, n_features) otherwise." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional second feature array. Only allowed if metric != \"precomputed\"." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. If metric is \"precomputed\", X is assumed to be a distance matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The possibilities are: - True: Force all values of array to be finite. - False: accepts np.inf, np.nan, pd.NA in array. - 'allow-nan': accepts only np.nan and pd.NA values in array. Values cannot be infinite. .. versionadded:: 0.22 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`." - }, - { - "name": "**kwds", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a scipy.spatial.distance metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']. These metrics support sparse matrix\n inputs.\n ['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n See the documentation for scipy.spatial.distance for details on these\n metrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\n for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_samples_Y)\n A distance matrix D such that D_{i, j} is the distance between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then D_{i, j} is the distance between the ith array\n from X and the jth array from Y.\n\nSee Also\n--------\npairwise_distances_chunked : Performs the same calculation as this\n function, but returns a generator of chunks of the distance matrix, in\n order to limit memory usage.\npaired_distances : Computes the distances between corresponding elements\n of two arrays." - }, - { - "name": "kernel_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n =============== ========================================\n metric Function\n =============== ========================================\n 'additive_chi2' sklearn.pairwise.additive_chi2_kernel\n 'chi2' sklearn.pairwise.chi2_kernel\n 'linear' sklearn.pairwise.linear_kernel\n 'poly' sklearn.pairwise.polynomial_kernel\n 'polynomial' sklearn.pairwise.polynomial_kernel\n 'rbf' sklearn.pairwise.rbf_kernel\n 'laplacian' sklearn.pairwise.laplacian_kernel\n 'sigmoid' sklearn.pairwise.sigmoid_kernel\n 'cosine' sklearn.pairwise.cosine_similarity\n =============== ========================================\n\nRead more in the :ref:`User Guide `." - }, - { - "name": "pairwise_kernels", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of pairwise kernels between samples, or a feature array. The shape of the array should be (n_samples_X, n_samples_X) if metric == \"precomputed\" and (n_samples_X, n_features) otherwise." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A second feature array only if X has shape (n_samples_X, n_features)." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"linear\"", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two rows from X as input and return the corresponding kernel value as a single number. This means that callables from :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on matrices, not single samples. Use the string identifying the kernel instead." - }, - { - "name": "filter_params", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to filter invalid parameters or not." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "**kwds", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the kernel function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n 'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise kernels between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n A second feature array only if X has shape (n_samples_X, n_features).\n\nmetric : str or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two rows from X as input and return the corresponding\n kernel value as a single number. This means that callables from\n :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on\n matrices, not single samples. Use the string identifying the kernel\n instead.\n\nfilter_params : bool, default=False\n Whether to filter invalid parameters or not.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the kernel function.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_samples_Y)\n A kernel matrix K such that K_{i, j} is the kernel between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then K_{i, j} is the kernel between the ith array\n from X and the jth array from Y.\n\nNotes\n-----\nIf metric is 'precomputed', Y is ignored and X is returned." - } - ] - }, - { - "name": "sklearn.metrics.setup", - "imports": [ - "import os", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._base", - "imports": [ - "from itertools import combinations", - "import numpy as np", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils.multiclass import type_of_target" - ], - "classes": [], - "functions": [ - { - "name": "_average_binary_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary label indicators." - }, - { - "name": "y_score", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or binary decisions." - }, - { - "name": "average", - "type": "str", - "hasDefault": true, - "default": "'macro'", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'micro'``: Calculate metrics globally by considering each element of the label indicator matrix as a label. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). ``'samples'``: Calculate metrics for each instance, and find their average. Will be ignored when ``y_true`` is binary." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "binary_metric", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The binary metric function to use." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average a binary metric for multilabel classification.\n\nParameters\n----------\ny_true : array, shape = [n_samples] or [n_samples, n_classes]\n True binary labels in binary label indicators.\n\ny_score : array, shape = [n_samples] or [n_samples, n_classes]\n Target scores, can either be probability estimates of the positive\n class, confidence values, or binary decisions.\n\naverage : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nbinary_metric : callable, returns shape [n_classes]\n The binary metric function to use.\n\nReturns\n-------\nscore : float or array of shape [n_classes]\n If not ``None``, average the score, else return the score for each\n classes." - }, - { - "name": "_average_multiclass_ovo_score", - "decorators": [], - "parameters": [ - { - "name": "binary_metric", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The binary metric function to use that accepts the following as input: y_true_target : array, shape = [n_samples_target] Some sub-array of y_true for a pair of classes designated positive and negative in the one-vs-one scheme. y_score_target : array, shape = [n_samples_target] Scores corresponding to the probability estimates of a sample belonging to the designated positive class label" - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True multiclass labels." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores corresponding to probability estimates of a sample belonging to a particular class." - }, - { - "name": "average", - "type": "Literal['macro', 'weighted']", - "hasDefault": true, - "default": "'macro'", - "limitation": null, - "ignored": false, - "docstring": "Determines the type of averaging performed on the pairwise binary metric scores: ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. Classes are assumed to be uniformly distributed. ``'weighted'``: Calculate metrics for each label, taking into account the prevalence of the classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average one-versus-one scores for multiclass classification.\n\nUses the binary metric for one-vs-one multiclass classification,\nwhere the score is computed according to the Hand & Till (2001) algorithm.\n\nParameters\n----------\nbinary_metric : callable\n The binary metric function to use that accepts the following as input:\n y_true_target : array, shape = [n_samples_target]\n Some sub-array of y_true for a pair of classes designated\n positive and negative in the one-vs-one scheme.\n y_score_target : array, shape = [n_samples_target]\n Scores corresponding to the probability estimates\n of a sample belonging to the designated positive class label\n\ny_true : array-like of shape (n_samples,)\n True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class.\n\naverage : {'macro', 'weighted'}, default='macro'\n Determines the type of averaging performed on the pairwise binary\n metric scores:\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\nReturns\n-------\nscore : float\n Average of the pairwise binary metric scores." - }, - { - "name": "_check_pos_label_consistency", - "decorators": [], - "parameters": [ - { - "name": "pos_label", - "type": "Optional[Union[str, int]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The positive label." - }, - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target vector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if `pos_label` need to be specified or not.\n\nIn binary classification, we fix `pos_label=1` if the labels are in the set\n{-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the\n`pos_label` parameters.\n\nParameters\n----------\npos_label : int, str or None\n The positive label.\ny_true : ndarray of shape (n_samples,)\n The target vector.\n\nReturns\n-------\npos_label : int\n If `pos_label` can be inferred, it will be returned.\n\nRaises\n------\nValueError\n In the case that `y_true` does not have label in {-1, 1} or {0, 1},\n it will raise a `ValueError`." - } - ] - }, - { - "name": "sklearn.metrics._classification", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csr_matrix", - "from preprocessing import LabelBinarizer", - "from preprocessing import LabelEncoder", - "from utils import assert_all_finite", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils import column_or_1d", - "from utils.multiclass import unique_labels", - "from utils.multiclass import type_of_target", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.sparsefuncs import count_nonzero", - "from exceptions import UndefinedMetricWarning", - "from _base import _check_pos_label_consistency" - ], - "classes": [], - "functions": [ - { - "name": "_check_zero_division", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_targets", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that y_true and y_pred belong to the same classification task.\n\nThis converts multiclass or binary types to a common shape, and raises a\nValueError for a mix of multilabel and multiclass targets, a mix of\nmultilabel formats, for the presence of continuous-valued or multioutput\ntargets, or for targets of different lengths.\n\nColumn vectors are squeezed to 1d, while multilabel formats are returned\nas CSR sparse label indicators.\n\nParameters\n----------\ny_true : array-like\n\ny_pred : array-like\n\nReturns\n-------\ntype_true : one of {'multilabel-indicator', 'multiclass', 'binary'}\n The type of the true target data, as output by\n ``utils.multiclass.type_of_target``.\n\ny_true : array or indicator matrix\n\ny_pred : array or indicator matrix" - }, - { - "name": "_weighted_sum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "accuracy_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, return the number of correctly classified samples. Otherwise, return the fraction of correctly classified samples." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Accuracy classification score.\n\nIn multilabel classification, this function computes subset accuracy:\nthe set of labels predicted for a sample must *exactly* match the\ncorresponding set of labels in y_true.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n If ``False``, return the number of correctly classified samples.\n Otherwise, return the fraction of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n If ``normalize == True``, return the fraction of correctly\n classified samples (float), else returns the number of correctly\n classified samples (int).\n\n The best performance is 1 with ``normalize == True`` and the number\n of samples with ``normalize == False``.\n\nSee Also\n--------\njaccard_score, hamming_loss, zero_one_loss\n\nNotes\n-----\nIn binary and multiclass classification, this function is equal\nto the ``jaccard_score`` function.\n\nExamples\n--------\n>>> from sklearn.metrics import accuracy_score\n>>> y_pred = [0, 2, 1, 3]\n>>> y_true = [0, 1, 2, 3]\n>>> accuracy_score(y_true, y_pred)\n0.5\n>>> accuracy_score(y_true, y_pred, normalize=False)\n2\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5" - }, - { - "name": "confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index the matrix. This may be used to reorder or select a subset of labels. If ``None`` is given, those that appear at least once in ``y_true`` or ``y_pred`` are used in sorted order." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.18" - }, - { - "name": "normalize", - "type": "Literal['true', 'pred', 'all']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute confusion matrix to evaluate the accuracy of a classification.\n\nBy definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\nis equal to the number of observations known to be in group :math:`i` and\npredicted to be in group :math:`j`.\n\nThus in binary classification, the count of true negatives is\n:math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is\n:math:`C_{1,1}` and false positives is :math:`C_{0,1}`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated targets as returned by a classifier.\n\nlabels : array-like of shape (n_classes), default=None\n List of labels to index the matrix. This may be used to reorder\n or select a subset of labels.\n If ``None`` is given, those that appear at least once\n in ``y_true`` or ``y_pred`` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nnormalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\nReturns\n-------\nC : ndarray of shape (n_classes, n_classes)\n Confusion matrix whose i-th row and j-th\n column entry indicates the number of\n samples with true label being i-th class\n and predicted label being j-th class.\n\nSee Also\n--------\nplot_confusion_matrix : Plot Confusion Matrix.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Confusion matrix\n `_\n (Wikipedia and other references may use a different\n convention for axes).\n\nExamples\n--------\n>>> from sklearn.metrics import confusion_matrix\n>>> y_true = [2, 0, 2, 2, 0, 1]\n>>> y_pred = [0, 0, 2, 2, 0, 2]\n>>> confusion_matrix(y_true, y_pred)\narray([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\n>>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n>>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n>>> confusion_matrix(y_true, y_pred, labels=[\"ant\", \"bird\", \"cat\"])\narray([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\nIn the binary case, we can extract true positives, etc as follows:\n\n>>> tn, fp, fn, tp = confusion_matrix([0, 1, 0, 1], [1, 1, 1, 0]).ravel()\n>>> (tn, fp, fn, tp)\n(0, 2, 1, 1)" - }, - { - "name": "multilabel_confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of classes or column indices to select some (or to force inclusion of classes absent from the data)." - }, - { - "name": "samplewise", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "In the multilabel case, this calculates a confusion matrix per sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute a confusion matrix for each class or sample.\n\n.. versionadded:: 0.21\n\nCompute class-wise (default) or sample-wise (samplewise=True) multilabel\nconfusion matrix to evaluate the accuracy of a classification, and output\nconfusion matrices for each class or sample.\n\nIn multilabel confusion matrix :math:`MCM`, the count of true negatives\nis :math:`MCM_{:,0,0}`, false negatives is :math:`MCM_{:,1,0}`,\ntrue positives is :math:`MCM_{:,1,1}` and false positives is\n:math:`MCM_{:,0,1}`.\n\nMulticlass data will be treated as if binarized under a one-vs-rest\ntransformation. Returned confusion matrices will be in the order of\nsorted unique labels in the union of (y_true, y_pred).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nlabels : array-like of shape (n_classes,), default=None\n A list of classes or column indices to select some (or to force\n inclusion of classes absent from the data).\n\nsamplewise : bool, default=False\n In the multilabel case, this calculates a confusion matrix per sample.\n\nReturns\n-------\nmulti_confusion : ndarray of shape (n_outputs, 2, 2)\n A 2x2 confusion matrix corresponding to each output in the input.\n When calculating class-wise multi_confusion (default), then\n n_outputs = n_labels; when calculating sample-wise multi_confusion\n (samplewise=True), n_outputs = n_samples. If ``labels`` is defined,\n the results will be returned in the order specified in ``labels``,\n otherwise the results will be returned in sorted order by default.\n\nSee Also\n--------\nconfusion_matrix\n\nNotes\n-----\nThe multilabel_confusion_matrix calculates class-wise or sample-wise\nmultilabel confusion matrices, and in multiclass tasks, labels are\nbinarized under a one-vs-rest way; while confusion_matrix calculates\none confusion matrix for confusion between every two classes.\n\nExamples\n--------\nMultilabel-indicator case:\n\n>>> import numpy as np\n>>> from sklearn.metrics import multilabel_confusion_matrix\n>>> y_true = np.array([[1, 0, 1],\n... [0, 1, 0]])\n>>> y_pred = np.array([[1, 0, 0],\n... [0, 1, 1]])\n>>> multilabel_confusion_matrix(y_true, y_pred)\narray([[[1, 0],\n [0, 1]],\n\n [[1, 0],\n [0, 1]],\n\n [[0, 1],\n [1, 0]]])\n\nMulticlass case:\n\n>>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n>>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n>>> multilabel_confusion_matrix(y_true, y_pred,\n... labels=[\"ant\", \"bird\", \"cat\"])\narray([[[3, 1],\n [0, 2]],\n\n [[5, 0],\n [1, 0]],\n\n [[2, 1],\n [1, 2]]])" - }, - { - "name": "cohen_kappa_score", - "decorators": [], - "parameters": [ - { - "name": "y1", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels assigned by the first annotator." - }, - { - "name": "y2", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels assigned by the second annotator. The kappa statistic is symmetric, so swapping ``y1`` and ``y2`` doesn't change the value." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index the matrix. This may be used to select a subset of labels. If None, all labels that appear at least once in ``y1`` or ``y2`` are used." - }, - { - "name": "weights", - "type": "Literal['linear', 'quadratic']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weighting type to calculate the score. None means no weighted; \"linear\" means linear weighted; \"quadratic\" means quadratic weighted." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n \\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny1 : array of shape (n_samples,)\n Labels assigned by the first annotator.\n\ny2 : array of shape (n_samples,)\n Labels assigned by the second annotator. The kappa statistic is\n symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.\n\nlabels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to select a\n subset of labels. If None, all labels that appear at least once in\n ``y1`` or ``y2`` are used.\n\nweights : {'linear', 'quadratic'}, default=None\n Weighting type to calculate the score. None means no weighted;\n \"linear\" means linear weighted; \"quadratic\" means quadratic weighted.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nkappa : float\n The kappa statistic, which is a number between -1 and 1. The maximum\n value means complete agreement; zero or lower means chance agreement.\n\nReferences\n----------\n.. [1] J. Cohen (1960). \"A coefficient of agreement for nominal scales\".\n Educational and Psychological Measurement 20(1):37-46.\n doi:10.1177/001316446002000104.\n.. [2] `R. Artstein and M. Poesio (2008). \"Inter-coder agreement for\n computational linguistics\". Computational Linguistics 34(4):555-596\n `_.\n.. [3] `Wikipedia entry for the Cohen's kappa\n `_." - }, - { - "name": "jaccard_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Optional[Literal['micro', 'macro', 'samples', 'weighted', 'binary']]", - "hasDefault": true, - "default": "'binary'", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[0.0, 1.0]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division, i.e. when there there are no negative values in predictions and labels. If set to \"warn\", this acts like 0, but a warning is also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Jaccard similarity coefficient score.\n\nThe Jaccard index [1], or Jaccard similarity coefficient, defined as\nthe size of the intersection divided by the size of the union of two label\nsets, is used to compare set of predicted labels for a sample to the\ncorresponding set of labels in ``y_true``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nlabels : array-like of shape (n_classes,), default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {None, 'micro', 'macro', 'samples', 'weighted', 'binary'}, default='binary'\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", {0.0, 1.0}, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when there\n there are no negative values in predictions and labels. If set to\n \"warn\", this acts like 0, but a warning is also raised.\n\nReturns\n-------\nscore : float (if average is not None) or array of floats, shape = [n_unique_labels]\n\nSee Also\n--------\naccuracy_score, f_score, multilabel_confusion_matrix\n\nNotes\n-----\n:func:`jaccard_score` may be a poor metric if there are no\npositives for some samples or classes. Jaccard is undefined if there are\nno true or predicted labels, and our implementation will return a score\nof 0 with a warning.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Jaccard index\n `_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import jaccard_score\n>>> y_true = np.array([[0, 1, 1],\n... [1, 1, 0]])\n>>> y_pred = np.array([[1, 1, 1],\n... [1, 0, 0]])\n\nIn the binary case:\n\n>>> jaccard_score(y_true[0], y_pred[0])\n0.6666...\n\nIn the multilabel case:\n\n>>> jaccard_score(y_true, y_pred, average='samples')\n0.5833...\n>>> jaccard_score(y_true, y_pred, average='macro')\n0.6666...\n>>> jaccard_score(y_true, y_pred, average=None)\narray([0.5, 0.5, 1. ])\n\nIn the multiclass case:\n\n>>> y_pred = [0, 2, 1, 2]\n>>> y_true = [0, 1, 2, 2]\n>>> jaccard_score(y_true, y_pred, average=None)\narray([1. , 0. , 0.33...])" - }, - { - "name": "matthews_corrcoef", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Matthews correlation coefficient (MCC).\n\nThe Matthews correlation coefficient is used in machine learning as a\nmeasure of the quality of binary and multiclass classifications. It takes\ninto account true and false positives and negatives and is generally\nregarded as a balanced measure which can be used even if the classes are of\nvery different sizes. The MCC is in essence a correlation coefficient value\nbetween -1 and +1. A coefficient of +1 represents a perfect prediction, 0\nan average random prediction and -1 an inverse prediction. The statistic\nis also known as the phi coefficient. [source: Wikipedia]\n\nBinary and multiclass labels are supported. Only in the binary case does\nthis relate to information about true and false positives and negatives.\nSee references below.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array, shape = [n_samples]\n Ground truth (correct) target values.\n\ny_pred : array, shape = [n_samples]\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nmcc : float\n The Matthews correlation coefficient (+1 represents a perfect\n prediction, 0 an average random prediction and -1 and inverse\n prediction).\n\nReferences\n----------\n.. [1] `Baldi, Brunak, Chauvin, Andersen and Nielsen, (2000). Assessing the\n accuracy of prediction algorithms for classification: an overview\n `_.\n\n.. [2] `Wikipedia entry for the Matthews Correlation Coefficient\n `_.\n\n.. [3] `Gorodkin, (2004). Comparing two K-category assignments by a\n K-category correlation coefficient\n `_.\n\n.. [4] `Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN\n Error Measures in MultiClass Prediction\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import matthews_corrcoef\n>>> y_true = [+1, +1, +1, -1]\n>>> y_pred = [+1, -1, +1, +1]\n>>> matthews_corrcoef(y_true, y_pred)\n-0.33..." - }, - { - "name": "zero_one_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, return the number of misclassifications. Otherwise, return the fraction of misclassifications." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Zero-one classification loss.\n\nIf normalize is ``True``, return the fraction of misclassifications\n(float), else it returns the number of misclassifications (int). The best\nperformance is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n If ``False``, return the number of misclassifications.\n Otherwise, return the fraction of misclassifications.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float or int,\n If ``normalize == True``, return the fraction of misclassifications\n (float), else it returns the number of misclassifications (int).\n\nNotes\n-----\nIn multilabel classification, the zero_one_loss function corresponds to\nthe subset zero-one loss: for each sample, the entire set of labels must be\ncorrectly predicted, otherwise the loss for that sample is equal to one.\n\nSee Also\n--------\naccuracy_score, hamming_loss, jaccard_score\n\nExamples\n--------\n>>> from sklearn.metrics import zero_one_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> zero_one_loss(y_true, y_pred)\n0.25\n>>> zero_one_loss(y_true, y_pred, normalize=False)\n1\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5" - }, - { - "name": "f1_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division, i.e. when all predictions and labels are negative. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a weighted average of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\n F1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples','weighted', 'binary'} or None, default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\nReturns\n-------\nf1_score : float or array of float, shape = [n_unique_labels]\n F1 score of the positive class in binary classification or weighted\n average of the F1 scores of each class for the multiclass task.\n\nSee Also\n--------\nfbeta_score, precision_recall_fscore_support, jaccard_score,\nmultilabel_confusion_matrix\n\nReferences\n----------\n.. [1] `Wikipedia entry for the F1-score\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import f1_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> f1_score(y_true, y_pred, average='macro')\n0.26...\n>>> f1_score(y_true, y_pred, average='micro')\n0.33...\n>>> f1_score(y_true, y_pred, average='weighted')\n0.26...\n>>> f1_score(y_true, y_pred, average=None)\narray([0.8, 0. , 0. ])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> f1_score(y_true, y_pred, zero_division=1)\n1.0...\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``." - }, - { - "name": "fbeta_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "beta", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the weight of recall in the combined score." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division, i.e. when all predictions and labels are negative. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the F-beta score.\n\nThe F-beta score is the weighted harmonic mean of precision and recall,\nreaching its optimal value at 1 and its worst value at 0.\n\nThe `beta` parameter determines the weight of recall in the combined\nscore. ``beta < 1`` lends more weight to precision, while ``beta > 1``\nfavors recall (``beta -> 0`` considers only precision, ``beta -> +inf``\nonly recall).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nbeta : float\n Determines the weight of recall in the combined score.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\nReturns\n-------\nfbeta_score : float (if average is not None) or array of float, shape = [n_unique_labels]\n F-beta score of the positive class in binary classification or weighted\n average of the F-beta score of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, multilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false positive == 0`` or\n``true positive + false negative == 0``, f-score returns 0 and raises\n``UndefinedMetricWarning``. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011).\n Modern Information Retrieval. Addison Wesley, pp. 327-328.\n\n.. [2] `Wikipedia entry for the F1-score\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)\n0.23...\n>>> fbeta_score(y_true, y_pred, average='micro', beta=0.5)\n0.33...\n>>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5)\n0.23...\n>>> fbeta_score(y_true, y_pred, average=None, beta=0.5)\narray([0.71..., 0. , 0. ])" - }, - { - "name": "_prf_divide", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs division and handles divide-by-zero.\n\nOn zero-division, sets the corresponding result elements equal to\n0 or 1 (according to ``zero_division``). Plus, if\n``zero_division != \"warn\"`` raises a warning.\n\nThe metric, modifier and average arguments are used only for determining\nan appropriate warning." - }, - { - "name": "_warn_prf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_set_wise_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation associated with set-wise metrics.\n\nReturns identified labels." - }, - { - "name": "precision_recall_fscore_support", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The strength of recall versus precision in the F-score." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['binary', 'micro', 'macro', 'samples', 'weighted']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "warn_for", - "type": "Union[Set, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This determines which warnings will be made in the case that this function is being used to return only one of its metrics." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division: - recall: when there are no positive labels - precision: when there are no positive predictions - f-score: both If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute precision, recall, F-measure and support for each class.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe F-beta score can be interpreted as a weighted harmonic mean of\nthe precision and recall, where an F-beta score reaches its best\nvalue at 1 and worst score at 0.\n\nThe F-beta score weights recall more than precision by a factor of\n``beta``. ``beta == 1.0`` means recall and precision are equally important.\n\nThe support is the number of occurrences of each class in ``y_true``.\n\nIf ``pos_label is None`` and in binary classification, this function\nreturns the average precision, recall and F-measure if ``average``\nis one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nbeta : float, default=1.0\n The strength of recall versus precision in the F-score.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'binary', 'micro', 'macro', 'samples','weighted'}, default=None\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nwarn_for : tuple or set, for internal use\n This determines which warnings will be made in the case that this\n function is being used to return only one of its metrics.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division:\n - recall: when there are no positive labels\n - precision: when there are no positive predictions\n - f-score: both\n\n If set to \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nprecision : float (if average is not None) or array of float, shape = [n_unique_labels]\n\nrecall : float (if average is not None) or array of float, , shape = [n_unique_labels]\n\nfbeta_score : float (if average is not None) or array of float, shape = [n_unique_labels]\n\nsupport : None (if average is not None) or array of int, shape = [n_unique_labels]\n The number of occurrences of each label in ``y_true``.\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Precision and recall\n `_.\n\n.. [2] `Wikipedia entry for the F1-score\n `_.\n\n.. [3] `Discriminative Methods for Multi-labeled Classification Advances\n in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu\n Godbole, Sunita Sarawagi\n `_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_fscore_support\n>>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])\n>>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])\n>>> precision_recall_fscore_support(y_true, y_pred, average='macro')\n(0.22..., 0.33..., 0.26..., None)\n>>> precision_recall_fscore_support(y_true, y_pred, average='micro')\n(0.33..., 0.33..., 0.33..., None)\n>>> precision_recall_fscore_support(y_true, y_pred, average='weighted')\n(0.22..., 0.33..., 0.26..., None)\n\nIt is possible to compute per-label precisions, recalls, F1-scores and\nsupports instead of averaging:\n\n>>> precision_recall_fscore_support(y_true, y_pred, average=None,\n... labels=['pig', 'dog', 'cat'])\n(array([0. , 0. , 0.66...]),\n array([0., 0., 1.]), array([0. , 0. , 0.8]),\n array([2, 2, 2]))" - }, - { - "name": "precision_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the precision.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nprecision : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Precision of the positive class in binary classification or weighted\n average of the precision of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, multilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision returns 0 and\nraises ``UndefinedMetricWarning``. This behavior can be\nmodified with ``zero_division``.\n\nExamples\n--------\n>>> from sklearn.metrics import precision_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> precision_score(y_true, y_pred, average='macro')\n0.22...\n>>> precision_score(y_true, y_pred, average='micro')\n0.33...\n>>> precision_score(y_true, y_pred, average='weighted')\n0.22...\n>>> precision_score(y_true, y_pred, average=None)\narray([0.66..., 0. , 0. ])\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> precision_score(y_true, y_pred, average=None)\narray([0.33..., 0. , 0. ])\n>>> precision_score(y_true, y_pred, average=None, zero_division=1)\narray([0.33..., 1. , 1. ])" - }, - { - "name": "recall_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the recall.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nrecall : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Recall of the positive class in binary classification or weighted\n average of the recall of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, balanced_accuracy_score,\nmultilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false negative == 0``, recall returns 0 and raises\n``UndefinedMetricWarning``. This behavior can be modified with\n``zero_division``.\n\nExamples\n--------\n>>> from sklearn.metrics import recall_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> recall_score(y_true, y_pred, average='macro')\n0.33...\n>>> recall_score(y_true, y_pred, average='micro')\n0.33...\n>>> recall_score(y_true, y_pred, average='weighted')\n0.33...\n>>> recall_score(y_true, y_pred, average=None)\narray([1., 0., 0.])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> recall_score(y_true, y_pred, average=None)\narray([0.5, 0. , 0. ])\n>>> recall_score(y_true, y_pred, average=None, zero_division=1)\narray([0.5, 1. , 1. ])" - }, - { - "name": "balanced_accuracy_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "adjusted", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When true, the result is adjusted for chance, so that random performance would score 0, and perfect performance scores 1." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the balanced accuracy.\n\nThe balanced accuracy in binary and multiclass classification problems to\ndeal with imbalanced datasets. It is defined as the average of recall\nobtained on each class.\n\nThe best value is 1 and the worst value is 0 when ``adjusted=False``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ny_true : 1d array-like\n Ground truth (correct) target values.\n\ny_pred : 1d array-like\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nadjusted : bool, default=False\n When true, the result is adjusted for chance, so that random\n performance would score 0, and perfect performance scores 1.\n\nReturns\n-------\nbalanced_accuracy : float\n\nSee Also\n--------\nrecall_score, roc_auc_score\n\nNotes\n-----\nSome literature promotes alternative definitions of balanced accuracy. Our\ndefinition is equivalent to :func:`accuracy_score` with class-balanced\nsample weights, and shares desirable properties with the binary case.\nSee the :ref:`User Guide `.\n\nReferences\n----------\n.. [1] Brodersen, K.H.; Ong, C.S.; Stephan, K.E.; Buhmann, J.M. (2010).\n The balanced accuracy and its posterior distribution.\n Proceedings of the 20th International Conference on Pattern\n Recognition, 3121-24.\n.. [2] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, (2015).\n `Fundamentals of Machine Learning for Predictive Data Analytics:\n Algorithms, Worked Examples, and Case Studies\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import balanced_accuracy_score\n>>> y_true = [0, 1, 0, 0, 1, 0]\n>>> y_pred = [0, 1, 0, 0, 0, 1]\n>>> balanced_accuracy_score(y_true, y_pred)\n0.625" - }, - { - "name": "classification_report", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Optional list of label indices to include in the report." - }, - { - "name": "target_names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Optional display names matching the labels (same order)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "digits", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of digits for formatting output floating point values. When ``output_dict`` is ``True``, this will be ignored and the returned values will not be rounded." - }, - { - "name": "output_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return output as dict. .. versionadded:: 0.20" - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a text report showing the main classification metrics.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like of shape (n_labels,), default=None\n Optional list of label indices to include in the report.\n\ntarget_names : list of str of shape (n_labels,), default=None\n Optional display names matching the labels (same order).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndigits : int, default=2\n Number of digits for formatting output floating point values.\n When ``output_dict`` is ``True``, this will be ignored and the\n returned values will not be rounded.\n\noutput_dict : bool, default=False\n If True, return output as dict.\n\n .. versionadded:: 0.20\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nreport : string / dict\n Text summary of the precision, recall, F1 score for each class.\n Dictionary returned if output_dict is True. Dictionary has the\n following structure::\n\n {'label 1': {'precision':0.5,\n 'recall':1.0,\n 'f1-score':0.67,\n 'support':1},\n 'label 2': { ... },\n ...\n }\n\n The reported averages include macro average (averaging the unweighted\n mean per label), weighted average (averaging the support-weighted mean\n per label), and sample average (only for multilabel classification).\n Micro average (averaging the total true positives, false negatives and\n false positives) is only shown for multi-label or multi-class\n with a subset of classes, because it corresponds to accuracy\n otherwise and would be the same for all metrics.\n See also :func:`precision_recall_fscore_support` for more details\n on averages.\n\n Note that in binary classification, recall of the positive class\n is also known as \"sensitivity\"; recall of the negative class is\n \"specificity\".\n\nSee Also\n--------\nprecision_recall_fscore_support, confusion_matrix,\nmultilabel_confusion_matrix\n\nExamples\n--------\n>>> from sklearn.metrics import classification_report\n>>> y_true = [0, 1, 2, 2, 2]\n>>> y_pred = [0, 0, 2, 2, 1]\n>>> target_names = ['class 0', 'class 1', 'class 2']\n>>> print(classification_report(y_true, y_pred, target_names=target_names))\n precision recall f1-score support\n\n class 0 0.50 1.00 0.67 1\n class 1 0.00 0.00 0.00 1\n class 2 1.00 0.67 0.80 3\n\n accuracy 0.60 5\n macro avg 0.50 0.56 0.49 5\nweighted avg 0.70 0.60 0.61 5\n\n>>> y_pred = [1, 1, 0]\n>>> y_true = [1, 1, 1]\n>>> print(classification_report(y_true, y_pred, labels=[1, 2, 3]))\n precision recall f1-score support\n\n 1 1.00 0.67 0.80 3\n 2 0.00 0.00 0.00 0\n 3 0.00 0.00 0.00 0\n\n micro avg 1.00 0.67 0.80 3\n macro avg 0.33 0.22 0.27 3\nweighted avg 1.00 0.67 0.80 3\n" - }, - { - "name": "hamming_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the average Hamming loss.\n\nThe Hamming loss is the fraction of labels that are incorrectly predicted.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nloss : float or int\n Return the average Hamming loss between element of ``y_true`` and\n ``y_pred``.\n\nSee Also\n--------\naccuracy_score, jaccard_score, zero_one_loss\n\nNotes\n-----\nIn multiclass classification, the Hamming loss corresponds to the Hamming\ndistance between ``y_true`` and ``y_pred`` which is equivalent to the\nsubset ``zero_one_loss`` function, when `normalize` parameter is set to\nTrue.\n\nIn multilabel classification, the Hamming loss is different from the\nsubset zero-one loss. The zero-one loss considers the entire set of labels\nfor a given sample incorrect if it does not entirely match the true set of\nlabels. Hamming loss is more forgiving in that it penalizes only the\nindividual labels.\n\nThe Hamming loss is upperbounded by the subset zero-one loss, when\n`normalize` parameter is set to True. It is always between 0 and 1,\nlower being better.\n\nReferences\n----------\n.. [1] Grigorios Tsoumakas, Ioannis Katakis. Multi-Label Classification:\n An Overview. International Journal of Data Warehousing & Mining,\n 3(3), 1-13, July-September 2007.\n\n.. [2] `Wikipedia entry on the Hamming distance\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import hamming_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> hamming_loss(y_true, y_pred)\n0.25\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))\n0.75" - }, - { - "name": "log_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels for n_samples samples." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the positive class. The labels in ``y_pred`` are assumed to be ordered alphabetically, as done by :class:`preprocessing.LabelBinarizer`." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-15", - "limitation": null, - "ignored": false, - "docstring": "Log loss is undefined for p=0 or p=1, so probabilities are clipped to max(eps, min(1 - eps, p))." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not provided, labels will be inferred from y_true. If ``labels`` is ``None`` and ``y_pred`` has shape (n_samples,) the labels are assumed to be binary and are inferred from ``y_true``. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\nand a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels for n_samples samples.\n\ny_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n Predicted probabilities, as returned by a classifier's\n predict_proba method. If ``y_pred.shape = (n_samples,)``\n the probabilities provided are assumed to be that of the\n positive class. The labels in ``y_pred`` are assumed to be\n ordered alphabetically, as done by\n :class:`preprocessing.LabelBinarizer`.\n\neps : float, default=1e-15\n Log loss is undefined for p=0 or p=1, so probabilities are\n clipped to max(eps, min(1 - eps, p)).\n\nnormalize : bool, default=True\n If true, return the mean loss per sample.\n Otherwise, return the sum of the per-sample losses.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nlabels : array-like, default=None\n If not provided, labels will be inferred from y_true. If ``labels``\n is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n assumed to be binary and are inferred from ``y_true``.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nloss : float\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nExamples\n--------\n>>> from sklearn.metrics import log_loss\n>>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n0.21616...\n\nReferences\n----------\nC.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\np. 209." - }, - { - "name": "hinge_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True target, consisting of integers of two values. The positive label must be greater than the negative label." - }, - { - "name": "pred_decision", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted decisions, as output by decision_function (floats)." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Contains all the labels for the problem. Used in multiclass hinge loss." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average hinge loss (non-regularized).\n\nIn binary class case, assuming labels in y_true are encoded with +1 and -1,\nwhen a prediction mistake is made, ``margin = y_true * pred_decision`` is\nalways negative (since the signs disagree), implying ``1 - margin`` is\nalways greater than 1. The cumulated hinge loss is therefore an upper\nbound of the number of mistakes made by the classifier.\n\nIn multiclass case, the function expects that either all the labels are\nincluded in y_true or an optional labels argument is provided which\ncontains all the labels. The multilabel margin is calculated according\nto Crammer-Singer's method. As in the binary case, the cumulated hinge loss\nis an upper bound of the number of mistakes made by the classifier.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n True target, consisting of integers of two values. The positive label\n must be greater than the negative label.\n\npred_decision : array of shape (n_samples,) or (n_samples, n_classes)\n Predicted decisions, as output by decision_function (floats).\n\nlabels : array-like, default=None\n Contains all the labels for the problem. Used in multiclass hinge loss.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n\nReferences\n----------\n.. [1] `Wikipedia entry on the Hinge loss\n `_.\n\n.. [2] Koby Crammer, Yoram Singer. On the Algorithmic\n Implementation of Multiclass Kernel-based Vector\n Machines. Journal of Machine Learning Research 2,\n (2001), 265-292.\n\n.. [3] `L1 AND L2 Regularization for Multiclass Hinge Loss Models\n by Robert C. Moore, John DeNero\n `_.\n\nExamples\n--------\n>>> from sklearn import svm\n>>> from sklearn.metrics import hinge_loss\n>>> X = [[0], [1]]\n>>> y = [-1, 1]\n>>> est = svm.LinearSVC(random_state=0)\n>>> est.fit(X, y)\nLinearSVC(random_state=0)\n>>> pred_decision = est.decision_function([[-2], [3], [0.5]])\n>>> pred_decision\narray([-2.18..., 2.36..., 0.09...])\n>>> hinge_loss([-1, 1, 1], pred_decision)\n0.30...\n\nIn the multiclass case:\n\n>>> import numpy as np\n>>> X = np.array([[0], [1], [2], [3]])\n>>> Y = np.array([0, 1, 2, 3])\n>>> labels = np.array([0, 1, 2, 3])\n>>> est = svm.LinearSVC()\n>>> est.fit(X, Y)\nLinearSVC()\n>>> pred_decision = est.decision_function([[-1], [2], [3]])\n>>> y_true = [0, 2, 3]\n>>> hinge_loss(y_true, pred_decision, labels=labels)\n0.56..." - }, - { - "name": "brier_score_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets." - }, - { - "name": "y_prob", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Probabilities of the positive class." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Label of the positive class. `pos_label` will be infered in the following manner: * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1; * else if `y_true` contains string, an error will be raised and `pos_label` should be explicitely specified; * otherwise, `pos_label` defaults to the greater label, i.e. `np.unique(y_true)[-1]`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed is the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n True targets.\n\ny_prob : array of shape (n_samples,)\n Probabilities of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\npos_label : int or str, default=None\n Label of the positive class. `pos_label` will be infered in the\n following manner:\n\n * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n * else if `y_true` contains string, an error will be raised and\n `pos_label` should be explicitely specified;\n * otherwise, `pos_label` defaults to the greater label,\n i.e. `np.unique(y_true)[-1]`.\n\nReturns\n-------\nscore : float\n Brier score loss.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import brier_score_loss\n>>> y_true = np.array([0, 1, 1, 0])\n>>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n>>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n>>> brier_score_loss(y_true, y_prob)\n0.037...\n>>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n0.037...\n>>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n0.037...\n>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n0.0\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Brier score\n `_." - } - ] - }, - { - "name": "sklearn.metrics._ranking", - "imports": [ - "import warnings", - "from functools import partial", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.stats import rankdata", - "from utils import assert_all_finite", - "from utils import check_consistent_length", - "from utils import column_or_1d", - "from utils import check_array", - "from utils.multiclass import type_of_target", - "from utils.extmath import stable_cumsum", - "from utils.sparsefuncs import count_nonzero", - "from utils.validation import _deprecate_positional_args", - "from exceptions import UndefinedMetricWarning", - "from preprocessing import label_binarize", - "from utils._encode import _encode", - "from utils._encode import _unique", - "from _base import _average_binary_score", - "from _base import _average_multiclass_ovo_score", - "from _base import _check_pos_label_consistency" - ], - "classes": [], - "functions": [ - { - "name": "auc", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "x coordinates. These must be either monotonic increasing or monotonic decreasing." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "y coordinates." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Area Under the Curve (AUC) using the trapezoidal rule.\n\nThis is a general function, given points on a curve. For computing the\narea under the ROC-curve, see :func:`roc_auc_score`. For an alternative\nway to summarize a precision-recall curve, see\n:func:`average_precision_score`.\n\nParameters\n----------\nx : ndarray of shape (n,)\n x coordinates. These must be either monotonic increasing or monotonic\n decreasing.\ny : ndarray of shape, (n,)\n y coordinates.\n\nReturns\n-------\nauc : float\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\naverage_precision_score : Compute average precision from prediction scores.\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=2)\n>>> metrics.auc(fpr, tpr)\n0.75" - }, - { - "name": "average_precision_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels or binary label indicators." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by :term:`decision_function` on some classifiers)." - }, - { - "name": "average", - "type": "Literal['micro', 'samples', 'weighted', 'macro']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'micro'``: Calculate metrics globally by considering each element of the label indicator matrix as a label. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). ``'samples'``: Calculate metrics for each instance, and find their average. Will be ignored when ``y_true`` is binary." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. Only applied to binary ``y_true``. For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,) or (n_samples, n_classes)\n True binary labels or binary label indicators.\n\ny_score : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by :term:`decision_function` on some classifiers).\n\naverage : {'micro', 'samples', 'weighted', 'macro'} or None, default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\npos_label : int or str, default=1\n The label of the positive class. Only applied to binary ``y_true``.\n For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\naverage_precision : float\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\nNotes\n-----\n.. versionchanged:: 0.19\n Instead of linearly interpolating between operating points, precisions\n are weighted by the change in recall since the last operating point.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Average precision\n `_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import average_precision_score\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> average_precision_score(y_true, y_scores)\n0.83..." - }, - { - "name": "det_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute error rates for different probability thresholds.\n\n.. note::\n This metric is used for evaluation of ranking and error tradeoffs of\n a binary classification task.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\ny_score : ndarray of shape of (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nfpr : ndarray of shape (n_thresholds,)\n False positive rate (FPR) such that element i is the false positive\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false acceptance propability or fall-out.\n\nfnr : ndarray of shape (n_thresholds,)\n False negative rate (FNR) such that element i is the false negative\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false rejection or miss rate.\n\nthresholds : ndarray of shape (n_thresholds,)\n Decreasing score values.\n\nSee Also\n--------\nplot_det_curve : Plot detection error tradeoff (DET) curve.\nDetCurveDisplay : DET curve visualization.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nprecision_recall_curve : Compute precision-recall curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import det_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = det_curve(y_true, y_scores)\n>>> fpr\narray([0.5, 0.5, 0. ])\n>>> fnr\narray([0. , 0.5, 0.5])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])" - }, - { - "name": "_binary_roc_auc_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Binary roc auc score." - }, - { - "name": "roc_auc_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels or binary label indicators. The binary and multiclass cases expect labels with shape (n_samples,) while the multilabel case expects binary label indicators with shape (n_samples, n_classes)." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores. * In the binary case, it corresponds to an array of shape `(n_samples,)`. Both probability estimates and non-thresholded decision values can be provided. The probability estimates correspond to the **probability of the class with the greater label**, i.e. `estimator.classes_[1]` and thus `estimator.predict_proba(X, y)[:, 1]`. The decision values corresponds to the output of `estimator.decision_function(X, y)`. See more information in the :ref:`User guide `; * In the multiclass case, it corresponds to an array of shape `(n_samples, n_classes)` of probability estimates provided by the `predict_proba` method. The probability estimates **must** sum to 1 across the possible classes. In addition, the order of the class scores must correspond to the order of ``labels``, if provided, or else to the numerical or lexicographical order of the labels in ``y_true``. See more information in the :ref:`User guide `; * In the multilabel case, it corresponds to an array of shape `(n_samples, n_classes)`. Probability estimates are provided by the `predict_proba` method and the non-thresholded decision values by the `decision_function` method. The probability estimates correspond to the **probability of the class with the greater label for each output** of the classifier. See more information in the :ref:`User guide `." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: Note: multiclass ROC AUC currently only handles the 'macro' and 'weighted' averages. ``'micro'``: Calculate metrics globally by considering each element of the label indicator matrix as a label. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). ``'samples'``: Calculate metrics for each instance, and find their average. Will be ignored when ``y_true`` is binary." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "max_fpr", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not ``None``, the standardized partial AUC [2]_ over the range [0, max_fpr] is returned. For the multiclass case, ``max_fpr``, should be either equal to ``None`` or ``1.0`` as AUC ROC partial computation currently is not supported for multiclass." - }, - { - "name": "multi_class", - "type": "Literal['raise', 'ovr', 'ovo']", - "hasDefault": true, - "default": "'raise'", - "limitation": null, - "ignored": false, - "docstring": "Only used for multiclass targets. Determines the type of configuration to use. The default value raises an error, so either ``'ovr'`` or ``'ovo'`` must be passed explicitly. ``'ovr'``: Stands for One-vs-rest. Computes the AUC of each class against the rest [3]_ [4]_. This treats the multiclass case in the same way as the multilabel case. Sensitive to class imbalance even when ``average == 'macro'``, because class imbalance affects the composition of each of the 'rest' groupings. ``'ovo'``: Stands for One-vs-one. Computes the average AUC of all possible pairwise combinations of classes [5]_. Insensitive to class imbalance when ``average == 'macro'``." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only used for multiclass targets. List of labels that index the classes in ``y_score``. If ``None``, the numerical or lexicographical order of the labels in ``y_true`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)\nfrom prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n True labels or binary label indicators. The binary and multiclass cases\n expect labels with shape (n_samples,) while the multilabel case expects\n binary label indicators with shape (n_samples, n_classes).\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores.\n\n * In the binary case, it corresponds to an array of shape\n `(n_samples,)`. Both probability estimates and non-thresholded\n decision values can be provided. The probability estimates correspond\n to the **probability of the class with the greater label**,\n i.e. `estimator.classes_[1]` and thus\n `estimator.predict_proba(X, y)[:, 1]`. The decision values\n corresponds to the output of `estimator.decision_function(X, y)`.\n See more information in the :ref:`User guide `;\n * In the multiclass case, it corresponds to an array of shape\n `(n_samples, n_classes)` of probability estimates provided by the\n `predict_proba` method. The probability estimates **must**\n sum to 1 across the possible classes. In addition, the order of the\n class scores must correspond to the order of ``labels``,\n if provided, or else to the numerical or lexicographical order of\n the labels in ``y_true``. See more information in the\n :ref:`User guide `;\n * In the multilabel case, it corresponds to an array of shape\n `(n_samples, n_classes)`. Probability estimates are provided by the\n `predict_proba` method and the non-thresholded decision values by\n the `decision_function` method. The probability estimates correspond\n to the **probability of the class with the greater label for each\n output** of the classifier. See more information in the\n :ref:`User guide `.\n\naverage : {'micro', 'macro', 'samples', 'weighted'} or None, default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n Note: multiclass ROC AUC currently only handles the 'macro' and\n 'weighted' averages.\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmax_fpr : float > 0 and <= 1, default=None\n If not ``None``, the standardized partial AUC [2]_ over the range\n [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n computation currently is not supported for multiclass.\n\nmulti_class : {'raise', 'ovr', 'ovo'}, default='raise'\n Only used for multiclass targets. Determines the type of configuration\n to use. The default value raises an error, so either\n ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n ``'ovr'``:\n Stands for One-vs-rest. Computes the AUC of each class\n against the rest [3]_ [4]_. This\n treats the multiclass case in the same way as the multilabel case.\n Sensitive to class imbalance even when ``average == 'macro'``,\n because class imbalance affects the composition of each of the\n 'rest' groupings.\n ``'ovo'``:\n Stands for One-vs-one. Computes the average AUC of all\n possible pairwise combinations of classes [5]_.\n Insensitive to class imbalance when\n ``average == 'macro'``.\n\nlabels : array-like of shape (n_classes,), default=None\n Only used for multiclass targets. List of labels that index the\n classes in ``y_score``. If ``None``, the numerical or lexicographical\n order of the labels in ``y_true`` is used.\n\nReturns\n-------\nauc : float\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n.. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n `_\n\n.. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n probability estimation trees (Section 6.2), CeDER Working Paper\n #IS-00-04, Stern School of Business, New York University.\n\n.. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n Recognition Letters, 27(8), 861-874.\n `_\n\n.. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n Under the ROC Curve for Multiple Class Classification Problems.\n Machine Learning, 45(2), 171-186.\n `_\n\nSee Also\n--------\naverage_precision_score : Area under the precision-recall curve.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\nBinary case:\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.metrics import roc_auc_score\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n0.99...\n>>> roc_auc_score(y, clf.decision_function(X))\n0.99...\n\nMulticlass case:\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n0.99...\n\nMultilabel case:\n\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> X, y = make_multilabel_classification(random_state=0)\n>>> clf = MultiOutputClassifier(clf).fit(X, y)\n>>> # get a list of n_output containing probability arrays of shape\n>>> # (n_samples, n_classes)\n>>> y_pred = clf.predict_proba(X)\n>>> # extract the positive columns for each output\n>>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n>>> roc_auc_score(y, y_pred, average=None)\narray([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> clf = RidgeClassifierCV().fit(X, y)\n>>> roc_auc_score(y, clf.decision_function(X), average=None)\narray([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])" - }, - { - "name": "_multiclass_roc_auc_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True multiclass labels." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores corresponding to probability estimates of a sample belonging to a particular class" - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index ``y_score`` used for multiclass. If ``None``, the lexical order of ``y_true`` is used to index ``y_score``." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'ovo']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the type of multiclass configuration to use. ``'ovr'``: Calculate metrics for the multiclass case using the one-vs-rest approach. ``'ovo'``: Calculate metrics for the multiclass case using the one-vs-one approach." - }, - { - "name": "average", - "type": "Literal['macro', 'weighted']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. Classes are assumed to be uniformly distributed. ``'weighted'``: Calculate metrics for each label, taking into account the prevalence of the classes." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Multiclass roc auc score.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class\n\nlabels : array-like of shape (n_classes,) or None\n List of labels to index ``y_score`` used for multiclass. If ``None``,\n the lexical order of ``y_true`` is used to index ``y_score``.\n\nmulti_class : {'ovr', 'ovo'}\n Determines the type of multiclass configuration to use.\n ``'ovr'``:\n Calculate metrics for the multiclass case using the one-vs-rest\n approach.\n ``'ovo'``:\n Calculate metrics for the multiclass case using the one-vs-one\n approach.\n\naverage : {'macro', 'weighted'}\n Determines the type of averaging performed on the pairwise binary\n metric scores\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\nsample_weight : array-like of shape (n_samples,) or None\n Sample weights." - }, - { - "name": "_binary_clf_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of binary classification." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated probabilities or output of a decision function." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate true and false positives per binary classification threshold.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True targets of binary classification.\n\ny_score : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\npos_label : int or str, default=None\n The label of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nfps : ndarray of shape (n_thresholds,)\n A count of false positives, at index i being the number of negative\n samples assigned a score >= thresholds[i]. The total number of\n negative samples is equal to fps[-1] (thus true negatives are given by\n fps[-1] - fps).\n\ntps : ndarray of shape (n_thresholds,)\n An increasing count of true positives, at index i being the number\n of positive samples assigned a score >= thresholds[i]. The total\n number of positive samples is equal to tps[-1] (thus false negatives\n are given by tps[-1] - tps).\n\nthresholds : ndarray of shape (n_thresholds,)\n Decreasing score values." - }, - { - "name": "precision_recall_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given." - }, - { - "name": "probas_pred", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated probabilities or output of a decision function." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute precision-recall pairs for different probability thresholds.\n\nNote: this implementation is restricted to the binary classification task.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe last precision and recall values are 1. and 0. respectively and do not\nhave a corresponding threshold. This ensures that the graph starts on the\ny axis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\nprobas_pred : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nprecision : ndarray of shape (n_thresholds + 1,)\n Precision values such that element i is the precision of\n predictions with score >= thresholds[i] and the last element is 1.\n\nrecall : ndarray of shape (n_thresholds + 1,)\n Decreasing recall values such that element i is the recall of\n predictions with score >= thresholds[i] and the last element is 0.\n\nthresholds : ndarray of shape (n_thresholds,)\n Increasing thresholds on the decision function used to compute\n precision and recall. n_thresholds <= len(np.unique(probas_pred)).\n\nSee Also\n--------\nplot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\nPrecisionRecallDisplay : Precision Recall visualization.\naverage_precision_score : Compute average precision from prediction scores.\ndet_curve: Compute error rates for different probability thresholds.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> precision, recall, thresholds = precision_recall_curve(\n... y_true, y_scores)\n>>> precision\narray([0.66666667, 0.5 , 1. , 1. ])\n>>> recall\narray([1. , 0.5, 0.5, 0. ])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])" - }, - { - "name": "roc_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "drop_intermediate", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve. This is useful in order to create lighter ROC curves. .. versionadded:: 0.17 parameter *drop_intermediate*." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Receiver operating characteristic (ROC).\n\nNote: this implementation is restricted to the binary classification task.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\ny_score : ndarray of shape (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndrop_intermediate : bool, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\n .. versionadded:: 0.17\n parameter *drop_intermediate*.\n\nReturns\n-------\nfpr : ndarray of shape (>2,)\n Increasing false positive rates such that element i is the false\n positive rate of predictions with score >= `thresholds[i]`.\n\ntpr : ndarray of shape (>2,)\n Increasing true positive rates such that element `i` is the true\n positive rate of predictions with score >= `thresholds[i]`.\n\nthresholds : ndarray of shape = (n_thresholds,)\n Decreasing thresholds on the decision function used to compute\n fpr and tpr. `thresholds[0]` represents no instances being predicted\n and is arbitrarily set to `max(y_score) + 1`.\n\nSee Also\n--------\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\ndet_curve: Compute error rates for different probability thresholds.\nroc_auc_score : Compute the area under the ROC curve.\n\nNotes\n-----\nSince the thresholds are sorted from low to high values, they\nare reversed upon returning them to ensure they correspond to both ``fpr``\nand ``tpr``, which are sorted in reversed order during their calculation.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n Letters, 2006, 27(8):861-874.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n>>> fpr\narray([0. , 0. , 0.5, 0.5, 1. ])\n>>> tpr\narray([0. , 0.5, 0.5, 1. , 1. ])\n>>> thresholds\narray([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])" - }, - { - "name": "label_ranking_average_precision_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary indicator format." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute ranking-based average precision.\n\nLabel ranking average precision (LRAP) is the average over each ground\ntruth label assigned to each sample, of the ratio of true vs. total\nlabels with lower score.\n\nThis metric is used in multilabel ranking problem, where the goal\nis to give better rank to the labels associated to each sample.\n\nThe obtained score is always strictly greater than 0 and\nthe best value is 1.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscore : float\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import label_ranking_average_precision_score\n>>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n>>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n>>> label_ranking_average_precision_score(y_true, y_score)\n0.416..." - }, - { - "name": "coverage_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary indicator format." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\ncoverage_error : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US." - }, - { - "name": "label_ranking_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary indicator format." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n A function *label_ranking_loss*\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US." - }, - { - "name": "_dcg_sample_scores", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "log_base", - "type": "float", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important)." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nlog_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : ndarray of shape (n_samples,)\n The DCG score for each sample.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1." - }, - { - "name": "_tie_averaged_dcg", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The true relevance scores." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted scores." - }, - { - "name": "discount_cumsum", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed cumulative sum of the discounts." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute DCG by averaging over possible permutations of ties.\n\nThe gain (`y_true`) of an index falling inside a tied group (in the order\ninduced by `y_score`) is replaced by the average gain within this group.\nThe discounted gain for a tied group is then the average `y_true` within\nthis group times the sum of discounts of the corresponding ranks.\n\nThis amounts to averaging scores for all possible orderings of the tied\ngroups.\n\n(note in the case of dcg@k the discount is 0 after index k)\n\nParameters\n----------\ny_true : ndarray\n The true relevance scores.\n\ny_score : ndarray\n Predicted scores.\n\ndiscount_cumsum : ndarray\n Precomputed cumulative sum of the discounts.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n The discounted cumulative gain.\n\nReferences\n----------\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg." - }, - { - "name": "_check_dcg_target_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "dcg_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "log_base", - "type": "float", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, all samples are given the same weight." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nUsually the Normalized Discounted Cumulative Gain (NDCG, computed by\nndcg_score) is preferred.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nlog_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n The averaged sample DCG scores.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1.\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n`_.\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013).\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> from sklearn.metrics import dcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict scores for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> dcg_score(true_relevance, scores)\n9.49...\n>>> # we can set k to truncate the sum; only top k answers contribute\n>>> dcg_score(true_relevance, scores, k=2)\n5.63...\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average true\n>>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n>>> dcg_score(true_relevance, scores, k=1)\n7.5\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> dcg_score(true_relevance,\n... scores, k=1, ignore_ties=True)\n5.0" - }, - { - "name": "_ndcg_sample_scores", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : ndarray of shape (n_samples,)\n The NDCG score for each sample (float in [0., 1.]).\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized)." - }, - { - "name": "ndcg_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, all samples are given the same weight." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : float in [0., 1.]\n The averaged NDCG scores for all samples.\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized).\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n`_\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013)\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> from sklearn.metrics import ndcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict some scores (relevance) for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> ndcg_score(true_relevance, scores)\n0.69...\n>>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n>>> ndcg_score(true_relevance, scores)\n0.49...\n>>> # we can set k to truncate the sum; only top k answers contribute.\n>>> ndcg_score(true_relevance, scores, k=4)\n0.35...\n>>> # the normalization takes k into account so a perfect answer\n>>> # would still get 1.0\n>>> ndcg_score(true_relevance, true_relevance, k=4)\n1.0\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average (normalized)\n>>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n>>> ndcg_score(true_relevance, scores, k=1)\n0.75\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> ndcg_score(true_relevance,\n... scores, k=1, ignore_ties=True)\n0.5" - }, - { - "name": "top_k_accuracy_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores. These can be either probability estimates or non-thresholded decision values (as returned by :term:`decision_function` on some classifiers). The binary case expects scores with shape (n_samples,) while the multiclass case expects scores with shape (n_samples, n_classes). In the nulticlass case, the order of the class scores must correspond to the order of ``labels``, if provided, or else to the numerical or lexicographical order of the labels in ``y_true``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of most likely outcomes considered to find the correct label." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If `True`, return the fraction of correctly classified samples. Otherwise, return the number of correctly classified samples." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If `None`, all samples are given the same weight." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multiclass only. List of labels that index the classes in ``y_score``. If ``None``, the numerical or lexicographical order of the labels in ``y_true`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide `\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True labels.\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores. These can be either probability estimates or\n non-thresholded decision values (as returned by\n :term:`decision_function` on some classifiers). The binary case expects\n scores with shape (n_samples,) while the multiclass case expects scores\n with shape (n_samples, n_classes). In the nulticlass case, the order of\n the class scores must correspond to the order of ``labels``, if\n provided, or else to the numerical or lexicographical order of the\n labels in ``y_true``.\n\nk : int, default=2\n Number of most likely outcomes considered to find the correct label.\n\nnormalize : bool, default=True\n If `True`, return the fraction of correctly classified samples.\n Otherwise, return the number of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If `None`, all samples are given the same weight.\n\nlabels : array-like of shape (n_classes,), default=None\n Multiclass only. List of labels that index the classes in ``y_score``.\n If ``None``, the numerical or lexicographical order of the labels in\n ``y_true`` is used.\n\nReturns\n-------\nscore : float\n The top-k accuracy score. The best performance is 1 with\n `normalize == True` and the number of samples with\n `normalize == False`.\n\nSee also\n--------\naccuracy_score\n\nNotes\n-----\nIn cases where two or more labels are assigned equal predicted scores,\nthe labels with the highest indices will be chosen first. This might\nimpact the result if the correct label falls after the threshold because\nof that.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import top_k_accuracy_score\n>>> y_true = np.array([0, 1, 2, 2])\n>>> y_score = np.array([[0.5, 0.2, 0.2], # 0 is in top 2\n... [0.3, 0.4, 0.2], # 1 is in top 2\n... [0.2, 0.4, 0.3], # 2 is in top 2\n... [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n>>> top_k_accuracy_score(y_true, y_score, k=2)\n0.75\n>>> # Not normalizing gives the number of \"correctly\" classified samples\n>>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n3" - } - ] - }, - { - "name": "sklearn.metrics._regression", - "imports": [ - "import numpy as np", - "import warnings", - "from _loss.glm_distribution import TweedieDistribution", - "from utils.validation import check_array", - "from utils.validation import check_consistent_length", - "from utils.validation import _num_samples", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import _check_sample_weight", - "from utils.stats import _weighted_percentile", - "from exceptions import UndefinedMetricWarning" - ], - "classes": [], - "functions": [ - { - "name": "_check_reg_targets", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "multioutput", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "'variance_weighted'] or None None is accepted due to backward compatibility of r2_score()." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that y_true and y_pred belong to the same regression task.\n\nParameters\n----------\ny_true : array-like\n\ny_pred : array-like\n\nmultioutput : array-like or string in ['raw_values', uniform_average',\n 'variance_weighted'] or None\n None is accepted due to backward compatibility of r2_score().\n\nReturns\n-------\ntype_true : one of {'continuous', continuous-multioutput'}\n The type of the true target data, as output by\n 'utils.multiclass.type_of_target'.\n\ny_true : array-like of shape (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples, n_outputs)\n Estimated target values.\n\nmultioutput : array-like of shape (n_outputs) or string in ['raw_values',\n uniform_average', 'variance_weighted'] or None\n Custom output weights if ``multioutput`` is array-like or\n just the corresponding argument if ``multioutput`` is a\n correct keyword.\n\ndtype : str or list, default=\"numeric\"\n the dtype argument passed to check_array." - }, - { - "name": "mean_absolute_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean absolute error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n\nReturns\n-------\nloss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAE output is non-negative floating point. The best value is 0.0.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_absolute_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_absolute_error(y_true, y_pred)\n0.5\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> mean_absolute_error(y_true, y_pred)\n0.75\n>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')\narray([0.5, 1. ])\n>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.85..." - }, - { - "name": "mean_absolute_percentage_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. If input is list then the shape must be (n_outputs,). 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean absolute percentage error regression loss.\n\nNote here that we do not represent the output as a percentage in range\n[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n If input is list then the shape must be (n_outputs,).\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats in the range [0, 1/eps]\n If multioutput is 'raw_values', then mean absolute percentage error\n is returned for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAPE output is non-negative floating point. The best value is 0.0.\n But note the fact that bad predictions can lead to arbitarily large\n MAPE values, especially if some y_true values are very close to zero.\n Note that we return a large value instead of `inf` when y_true is zero.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_absolute_percentage_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_absolute_percentage_error(y_true, y_pred)\n0.3273...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> mean_absolute_percentage_error(y_true, y_pred)\n0.5515...\n>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.6198..." - }, - { - "name": "mean_squared_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True returns MSE value, if False returns RMSE value." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean squared error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nsquared : bool, default=True\n If True returns MSE value, if False returns RMSE value.\n\nReturns\n-------\nloss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_squared_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_squared_error(y_true, y_pred)\n0.375\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_squared_error(y_true, y_pred, squared=False)\n0.612...\n>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]\n>>> y_pred = [[0, 2],[-1, 2],[8, -5]]\n>>> mean_squared_error(y_true, y_pred)\n0.708...\n>>> mean_squared_error(y_true, y_pred, squared=False)\n0.822...\n>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')\narray([0.41666667, 1. ])\n>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.825..." - }, - { - "name": "mean_squared_log_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors when the input is of multioutput format. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean squared logarithmic error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors when the input is of multioutput\n format.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_squared_log_error\n>>> y_true = [3, 5, 2.5, 7]\n>>> y_pred = [2.5, 5, 4, 8]\n>>> mean_squared_log_error(y_true, y_pred)\n0.039...\n>>> y_true = [[0.5, 1], [1, 2], [7, 6]]\n>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]\n>>> mean_squared_log_error(y_true, y_pred)\n0.044...\n>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')\narray([0.00462428, 0.08377444])\n>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.060..." - }, - { - "name": "median_absolute_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Median absolute error regression loss.\n\nMedian absolute error output is non-negative floating point. The best value\nis 0.0. Read more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Estimated target values.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values. Array-like value defines\n weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nloss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\nExamples\n--------\n>>> from sklearn.metrics import median_absolute_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> median_absolute_error(y_true, y_pred)\n0.5\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> median_absolute_error(y_true, y_pred)\n0.75\n>>> median_absolute_error(y_true, y_pred, multioutput='raw_values')\narray([0.5, 1. ])\n>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.85" - }, - { - "name": "explained_variance_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average', 'variance_weighted']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\nReturns\n-------\nscore : float or ndarray of floats\n The explained variance or ndarray if 'multioutput' is 'raw_values'.\n\nNotes\n-----\nThis is not a symmetric function.\n\nExamples\n--------\n>>> from sklearn.metrics import explained_variance_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> explained_variance_score(y_true, y_pred)\n0.957...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')\n0.983..." - }, - { - "name": "r2_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average', 'variance_weighted']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. Default is \"uniform_average\". 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. .. versionchanged:: 0.19 Default value of multioutput is 'uniform_average'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "R^2 (coefficient of determination) regression score function.\n\nBest possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, array-like of shape (n_outputs,) or None, default='uniform_average'\n\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n Default is \"uniform_average\".\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\n .. versionchanged:: 0.19\n Default value of multioutput is 'uniform_average'.\n\nReturns\n-------\nz : float or ndarray of floats\n The R^2 score or ndarray of scores if 'multioutput' is\n 'raw_values'.\n\nNotes\n-----\nThis is not a symmetric function.\n\nUnlike most other scores, R^2 score may be negative (it need not actually\nbe the square of a quantity R).\n\nThis metric is not well-defined for single samples and will return a NaN\nvalue if n_samples is less than two.\n\nReferences\n----------\n.. [1] `Wikipedia entry on the Coefficient of determination\n `_\n\nExamples\n--------\n>>> from sklearn.metrics import r2_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> r2_score(y_true, y_pred)\n0.948...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> r2_score(y_true, y_pred,\n... multioutput='variance_weighted')\n0.938...\n>>> y_true = [1, 2, 3]\n>>> y_pred = [1, 2, 3]\n>>> r2_score(y_true, y_pred)\n1.0\n>>> y_true = [1, 2, 3]\n>>> y_pred = [2, 2, 2]\n>>> r2_score(y_true, y_pred)\n0.0\n>>> y_true = [1, 2, 3]\n>>> y_pred = [3, 2, 1]\n>>> r2_score(y_true, y_pred)\n-3.0" - }, - { - "name": "max_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "max_error metric calculates the maximum residual error.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values.\n\nReturns\n-------\nmax_error : float\n A positive floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import max_error\n>>> y_true = [3, 2, 7, 1]\n>>> y_pred = [4, 2, 7, 1]\n>>> max_error(y_true, y_pred)\n1" - }, - { - "name": "mean_tweedie_deviance", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "power", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean Tweedie deviance regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\npower : float, default=0\n Tweedie power parameter. Either power <= 0 or power >= 1.\n\n The higher `p` the less weight is given to extreme\n deviations between true and predicted targets.\n\n - power < 0: Extreme stable distribution. Requires: y_pred > 0.\n - power = 0 : Normal distribution, output corresponds to\n mean_squared_error. y_true and y_pred can be any real numbers.\n - power = 1 : Poisson distribution. Requires: y_true >= 0 and\n y_pred > 0.\n - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n and y_pred > 0.\n - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n and y_pred > 0.\n - otherwise : Positive stable distribution. Requires: y_true > 0\n and y_pred > 0.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_tweedie_deviance\n>>> y_true = [2, 0, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_tweedie_deviance(y_true, y_pred, power=1)\n1.4260..." - }, - { - "name": "mean_poisson_deviance", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values. Requires y_true >= 0." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values. Requires y_pred > 0." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean Poisson deviance regression loss.\n\nPoisson deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=1`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true >= 0.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_poisson_deviance\n>>> y_true = [2, 0, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_poisson_deviance(y_true, y_pred)\n1.4260..." - }, - { - "name": "mean_gamma_deviance", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values. Requires y_true > 0." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values. Requires y_pred > 0." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean Gamma deviance regression loss.\n\nGamma deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=2`. It is invariant to scaling of\nthe target variable, and measures relative errors.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true > 0.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_gamma_deviance\n>>> y_true = [2, 0.5, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_gamma_deviance(y_true, y_pred)\n1.0568..." - } - ] - }, - { - "name": "sklearn.metrics._scorer", - "imports": [ - "from collections.abc import Iterable", - "from functools import partial", - "from collections import Counter", - "import numpy as np", - "from None import r2_score", - "from None import median_absolute_error", - "from None import max_error", - "from None import mean_absolute_error", - "from None import mean_squared_error", - "from None import mean_squared_log_error", - "from None import mean_poisson_deviance", - "from None import mean_gamma_deviance", - "from None import accuracy_score", - "from None import top_k_accuracy_score", - "from None import f1_score", - "from None import roc_auc_score", - "from None import average_precision_score", - "from None import precision_score", - "from None import recall_score", - "from None import log_loss", - "from None import balanced_accuracy_score", - "from None import explained_variance_score", - "from None import brier_score_loss", - "from None import jaccard_score", - "from None import mean_absolute_percentage_error", - "from cluster import adjusted_rand_score", - "from cluster import rand_score", - "from cluster import homogeneity_score", - "from cluster import completeness_score", - "from cluster import v_measure_score", - "from cluster import mutual_info_score", - "from cluster import adjusted_mutual_info_score", - "from cluster import normalized_mutual_info_score", - "from cluster import fowlkes_mallows_score", - "from utils.multiclass import type_of_target", - "from utils.validation import _deprecate_positional_args", - "from base import is_regressor" - ], - "classes": [ - { - "name": "_MultimetricScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "scorers", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary mapping names to callable scorers." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted target values." - }, - { - "name": "_use_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n - `_ProbaScorer` will be called twice.\n - `_PredictScorer` will be called twice.\n - `_ThresholdScorer` will be called twice.\n - `_ThresholdScorer` and `_PredictScorer` are called and\n estimator is a regressor.\n - `_ThresholdScorer` and `_ProbaScorer` are called and\n estimator does not have a `decision_function` attribute." - } - ], - "docstring": "Callable for multimetric scoring used to avoid repeated calls\nto `predict_proba`, `predict`, and `decision_function`.\n\n`_MultimetricScorer` will return a dictionary of scores corresponding to\nthe scorers in the dictionary. Note that `_MultimetricScorer` can be\ncreated with a dictionary with one key (i.e. only one actual scorer).\n\nParameters\n----------\nscorers : dict\n Dictionary mapping names to callable scorers." - }, - { - "name": "_BaseScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_select_proba_binary", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prediction given by `predict_proba`." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class labels for the estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Select the column of the positive label in `y_pred` when\nprobabilities are provided.\n\nParameters\n----------\ny_pred : ndarray of shape (n_samples, n_classes)\n The prediction given by `predict_proba`.\n\nclasses : ndarray of shape (n_classes,)\n The class labels for the estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Probability predictions of the positive class." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained estimator to use for scoring. Must have a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to estimator.predict." - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted target values for X relative to y_true.\n\nParameters\n----------\nestimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\ny_true : array-like\n Gold standard target values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - }, - { - "name": "_factory_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return non-default make_scorer arguments for repr." - } - ], - "docstring": null - }, - { - "name": "_PredictScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_score", - "decorators": [], - "parameters": [ - { - "name": "method_caller", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Returns predictions given an estimator, method name, and other arguments, potentially caching results." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained estimator to use for scoring. Must have a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to estimator.predict." - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted target values for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nestimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\ny_true : array-like\n Gold standard target values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - } - ], - "docstring": null - }, - { - "name": "_ProbaScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_score", - "decorators": [], - "parameters": [ - { - "name": "method_caller", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Returns predictions given an estimator, method name, and other arguments, potentially caching results." - }, - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained classifier to use for scoring. Must have a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to clf.predict_proba." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X. These must be class labels, not probabilities." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted probabilities for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nclf : object\n Trained classifier to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to clf.predict_proba.\n\ny : array-like\n Gold standard target values for X. These must be class labels,\n not probabilities.\n\nsample_weight : array-like, default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - }, - { - "name": "_factory_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_ThresholdScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_score", - "decorators": [], - "parameters": [ - { - "name": "method_caller", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Returns predictions given an estimator, method name, and other arguments, potentially caching results." - }, - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained classifier to use for scoring. Must have either a decision_function method or a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to clf.decision_function or clf.predict_proba." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X. These must be class labels, not decision function values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate decision function output for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nclf : object\n Trained classifier to use for scoring. Must have either a\n decision_function method or a predict_proba method; the output of\n that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to clf.decision_function or\n clf.predict_proba.\n\ny : array-like\n Gold standard target values for X. These must be class labels,\n not decision function values.\n\nsample_weight : array-like, default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - }, - { - "name": "_factory_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_cached_call", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call estimator with method and args and kwargs." - }, - { - "name": "get_scorer", - "decorators": [], - "parameters": [ - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scoring method as string. If callable it is returned as is." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a scorer from string.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscoring : str or callable\n Scoring method as string. If callable it is returned as is.\n\nReturns\n-------\nscorer : callable\n The scorer." - }, - { - "name": "_passthrough_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function that wraps estimator.score" - }, - { - "name": "check_scoring", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "allow_none", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If no scoring is specified and the estimator has no score function, we can either return None or raise an exception." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine scorer from user options.\n\nA TypeError will be thrown if the estimator cannot be scored.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nscoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nallow_none : bool, default=False\n If no scoring is specified and the estimator has no score function, we\n can either return None or raise an exception.\n\nReturns\n-------\nscoring : callable\n A scorer callable object / function with signature\n ``scorer(estimator, X, y)``." - }, - { - "name": "_check_multimetric_scoring", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator for which the scoring will be applied." - }, - { - "name": "scoring", - "type": "Union[List, Tuple[], Dict]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. See :ref:`multimetric_grid_search` for an example." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the scoring parameter in cases when multiple metrics are allowed.\n\nParameters\n----------\nestimator : sklearn estimator instance\n The estimator for which the scoring will be applied.\n\nscoring : list, tuple or dict\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n See :ref:`multimetric_grid_search` for an example.\n\nReturns\n-------\nscorers_dict : dict\n A dict mapping each scorer name to its validated scorer." - }, - { - "name": "make_scorer", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Score function (or loss function) with signature ``score_func(y, y_pred, **kwargs)``." - }, - { - "name": "greater_is_better", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether score_func is a score function (default), meaning high is good, or a loss function, meaning low is good. In the latter case, the scorer object will sign-flip the outcome of the score_func." - }, - { - "name": "needs_proba", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether score_func requires predict_proba to get probability estimates out of a classifier. If True, for binary `y_true`, the score function is supposed to accept a 1D `y_pred` (i.e., probability of the positive class, shape `(n_samples,)`)." - }, - { - "name": "needs_threshold", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether score_func takes a continuous decision certainty. This only works for binary classification using estimators that have either a decision_function or predict_proba method. If True, for binary `y_true`, the score function is supposed to accept a 1D `y_pred` (i.e., probability of the positive class or the decision function, shape `(n_samples,)`). For example ``average_precision`` or the area under the roc curve can not be computed using discrete predictions alone." - }, - { - "name": "**kwargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters to be passed to score_func." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a scorer from a performance metric or loss function.\n\nThis factory function wraps scoring functions for use in\n:class:`~sklearn.model_selection.GridSearchCV` and\n:func:`~sklearn.model_selection.cross_val_score`.\nIt takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n:func:`~sklearn.metrics.mean_squared_error`,\n:func:`~sklearn.metrics.adjusted_rand_index` or\n:func:`~sklearn.metrics.average_precision`\nand returns a callable that scores an estimator's output.\nThe signature of the call is `(estimator, X, y)` where `estimator`\nis the model to be evaluated, `X` is the data and `y` is the\nground truth labeling (or `None` in the case of unsupervised models).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable\n Score function (or loss function) with signature\n ``score_func(y, y_pred, **kwargs)``.\n\ngreater_is_better : bool, default=True\n Whether score_func is a score function (default), meaning high is good,\n or a loss function, meaning low is good. In the latter case, the\n scorer object will sign-flip the outcome of the score_func.\n\nneeds_proba : bool, default=False\n Whether score_func requires predict_proba to get probability estimates\n out of a classifier.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class, shape\n `(n_samples,)`).\n\nneeds_threshold : bool, default=False\n Whether score_func takes a continuous decision certainty.\n This only works for binary classification using estimators that\n have either a decision_function or predict_proba method.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class or the decision\n function, shape `(n_samples,)`).\n\n For example ``average_precision`` or the area under the roc curve\n can not be computed using discrete predictions alone.\n\n**kwargs : additional arguments\n Additional parameters to be passed to score_func.\n\nReturns\n-------\nscorer : callable\n Callable object that returns a scalar score; greater is better.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score, make_scorer\n>>> ftwo_scorer = make_scorer(fbeta_score, beta=2)\n>>> ftwo_scorer\nmake_scorer(fbeta_score, beta=2)\n>>> from sklearn.model_selection import GridSearchCV\n>>> from sklearn.svm import LinearSVC\n>>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},\n... scoring=ftwo_scorer)\n\nNotes\n-----\nIf `needs_proba=False` and `needs_threshold=False`, the score\nfunction is supposed to accept the output of :term:`predict`. If\n`needs_proba=True`, the score function is supposed to accept the\noutput of :term:`predict_proba` (For binary `y_true`, the score function is\nsupposed to accept probability of the positive class). If\n`needs_threshold=True`, the score function is supposed to accept the\noutput of :term:`decision_function`." - } - ] - }, - { - "name": "sklearn.metrics", - "imports": [ - "from _ranking import auc", - "from _ranking import average_precision_score", - "from _ranking import coverage_error", - "from _ranking import det_curve", - "from _ranking import dcg_score", - "from _ranking import label_ranking_average_precision_score", - "from _ranking import label_ranking_loss", - "from _ranking import ndcg_score", - "from _ranking import precision_recall_curve", - "from _ranking import roc_auc_score", - "from _ranking import roc_curve", - "from _ranking import top_k_accuracy_score", - "from _classification import accuracy_score", - "from _classification import balanced_accuracy_score", - "from _classification import classification_report", - "from _classification import cohen_kappa_score", - "from _classification import confusion_matrix", - "from _classification import f1_score", - "from _classification import fbeta_score", - "from _classification import hamming_loss", - "from _classification import hinge_loss", - "from _classification import jaccard_score", - "from _classification import log_loss", - "from _classification import matthews_corrcoef", - "from _classification import precision_recall_fscore_support", - "from _classification import precision_score", - "from _classification import recall_score", - "from _classification import zero_one_loss", - "from _classification import brier_score_loss", - "from _classification import multilabel_confusion_matrix", - "from None import cluster", - "from cluster import adjusted_mutual_info_score", - "from cluster import adjusted_rand_score", - "from cluster import rand_score", - "from cluster import pair_confusion_matrix", - "from cluster import completeness_score", - "from cluster import consensus_score", - "from cluster import homogeneity_completeness_v_measure", - "from cluster import homogeneity_score", - "from cluster import mutual_info_score", - "from cluster import normalized_mutual_info_score", - "from cluster import fowlkes_mallows_score", - "from cluster import silhouette_samples", - "from cluster import silhouette_score", - "from cluster import calinski_harabasz_score", - "from cluster import v_measure_score", - "from cluster import davies_bouldin_score", - "from pairwise import euclidean_distances", - "from pairwise import nan_euclidean_distances", - "from pairwise import pairwise_distances", - "from pairwise import pairwise_distances_argmin", - "from pairwise import pairwise_distances_argmin_min", - "from pairwise import pairwise_kernels", - "from pairwise import pairwise_distances_chunked", - "from _regression import explained_variance_score", - "from _regression import max_error", - "from _regression import mean_absolute_error", - "from _regression import mean_squared_error", - "from _regression import mean_squared_log_error", - "from _regression import median_absolute_error", - "from _regression import mean_absolute_percentage_error", - "from _regression import r2_score", - "from _regression import mean_tweedie_deviance", - "from _regression import mean_poisson_deviance", - "from _regression import mean_gamma_deviance", - "from _scorer import check_scoring", - "from _scorer import make_scorer", - "from _scorer import SCORERS", - "from _scorer import get_scorer", - "from _plot.det_curve import plot_det_curve", - "from _plot.det_curve import DetCurveDisplay", - "from _plot.roc_curve import plot_roc_curve", - "from _plot.roc_curve import RocCurveDisplay", - "from _plot.precision_recall_curve import plot_precision_recall_curve", - "from _plot.precision_recall_curve import PrecisionRecallDisplay", - "from _plot.confusion_matrix import plot_confusion_matrix", - "from _plot.confusion_matrix import ConfusionMatrixDisplay" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.cluster.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster._bicluster", - "imports": [ - "import numpy as np", - "from scipy.optimize import linear_sum_assignment", - "from utils.validation import check_consistent_length", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "_check_rows_and_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Unpacks the row and column arrays and checks their shape." - }, - { - "name": "_jaccard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Jaccard coefficient on the elements of the two biclusters." - }, - { - "name": "_pairwise_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes pairwise similarity matrix.\n\nresult[i, j] is the Jaccard coefficient of a's bicluster i and b's\nbicluster j." - }, - { - "name": "consensus_score", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tuple of row and column indicators for a set of biclusters." - }, - { - "name": "b", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Another set of biclusters like ``a``." - }, - { - "name": "similarity", - "type": "Union[Callable, Literal['jaccard']]", - "hasDefault": true, - "default": "'jaccard'", - "limitation": null, - "ignored": false, - "docstring": "May be the string \"jaccard\" to use the Jaccard coefficient, or any function that takes four arguments, each of which is a 1d indicator vector: (a_rows, a_columns, b_rows, b_columns)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\na : (rows, columns)\n Tuple of row and column indicators for a set of biclusters.\n\nb : (rows, columns)\n Another set of biclusters like ``a``.\n\nsimilarity : 'jaccard' or callable, default='jaccard'\n May be the string \"jaccard\" to use the Jaccard coefficient, or\n any function that takes four arguments, each of which is a 1d\n indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\nReferences\n----------\n\n* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n for bicluster acquisition\n `__." - } - ] - }, - { - "name": "sklearn.metrics.cluster._supervised", - "imports": [ - "import warnings", - "from math import log", - "import numpy as np", - "from scipy import sparse as sp", - "from _expected_mutual_info_fast import expected_mutual_information", - "from utils.fixes import _astype_copy_false", - "from utils.multiclass import type_of_target", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import check_array", - "from utils.validation import check_consistent_length" - ], - "classes": [], - "functions": [ - { - "name": "check_clusterings", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The true labels." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The predicted labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the labels arrays are 1D and of same dimension.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,)\n The true labels.\n\nlabels_pred : array-like of shape (n_samples,)\n The predicted labels." - }, - { - "name": "_generalized_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a particular mean of two numbers." - }, - { - "name": "contingency_matrix", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If a float, that value is added to all values in the contingency matrix. This helps to stop NaN propagation. If ``None``, nothing is adjusted." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If `True`, return a sparse CSR continency matrix. If `eps` is not `None` and `sparse` is `True` will raise ValueError. .. versionadded:: 0.18" - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Output dtype. Ignored if `eps` is not `None`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build a contingency matrix describing the relationship between labels.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate.\n\neps : float, default=None\n If a float, that value is added to all values in the contingency\n matrix. This helps to stop NaN propagation.\n If ``None``, nothing is adjusted.\n\nsparse : bool, default=False\n If `True`, return a sparse CSR continency matrix. If `eps` is not\n `None` and `sparse` is `True` will raise ValueError.\n\n .. versionadded:: 0.18\n\ndtype : numeric type, default=np.int64\n Output dtype. Ignored if `eps` is not `None`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncontingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]\n Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in\n true class :math:`i` and in predicted class :math:`j`. If\n ``eps is None``, the dtype of this array will be integer unless set\n otherwise with the ``dtype`` argument. If ``eps`` is given, the dtype\n will be float.\n Will be a ``sklearn.sparse.csr_matrix`` if ``sparse=True``." - }, - { - "name": "pair_confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pair confusion matrix arising from two clusterings.\n\nThe pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix\nbetween two clusterings by considering all pairs of samples and counting\npairs that are assigned into the same or into different clusters under\nthe true and predicted clusterings.\n\nConsidering a pair of samples that is clustered together a positive pair,\nthen as in binary classification the count of true negatives is\n:math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is\n:math:`C_{11}` and false positives is :math:`C_{01}`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\nReturns\n-------\nC : ndarray of shape (2, 2), dtype=np.int64\n The contingency matrix.\n\nSee Also\n--------\nrand_score: Rand Score\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have all non-zero entries on the\ndiagonal regardless of actual label values:\n\n >>> from sklearn.metrics.cluster import pair_confusion_matrix\n >>> pair_confusion_matrix([0, 0, 1, 1], [1, 1, 0, 0])\n array([[8, 0],\n [0, 4]]...\n\nLabelings that assign all classes members to the same clusters\nare complete but may be not always pure, hence penalized, and\nhave some off-diagonal non-zero entries:\n\n >>> pair_confusion_matrix([0, 0, 1, 2], [0, 0, 1, 1])\n array([[8, 2],\n [0, 2]]...\n\nNote that the matrix is not symmetric.\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075" - }, - { - "name": "rand_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\nReturns\n-------\nRI : float\n Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n perfect match.\n\nSee Also\n--------\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import rand_score\n >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized:\n\n >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.83...\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n.. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n.. https://en.wikipedia.org/wiki/Rand_index" - }, - { - "name": "adjusted_rand_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate\n\nReturns\n-------\nARI : float\n Similarity score between -1.0 and 1.0. Random labelings have an ARI\n close to 0.0. 1.0 stands for perfect match.\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import adjusted_rand_score\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.57...\n\nARI is symmetric, so labelings that have pure clusters with members\ncoming from the same classes but unnecessary splits are penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n 0.57...\n\nIf classes members are completely split across different clusters, the\nassignment is totally incomplete, hence the ARI is very low::\n\n >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\nReferences\n----------\n.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n Journal of Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n.. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n adjusted Rand index, Psychological Methods 2004\n\n.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted Mutual Information." - }, - { - "name": "homogeneity_completeness_v_measure", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Ratio of weight attributed to ``homogeneity`` vs ``completeness``. If ``beta`` is greater than 1, ``completeness`` is weighted more strongly in the calculation. If ``beta`` is less than 1, ``homogeneity`` is weighted more strongly." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the homogeneity and completeness and V-Measure scores at once.\n\nThose metrics are based on normalized conditional entropy measures of\nthe clustering labeling to evaluate given the knowledge of a Ground\nTruth class labels of the same samples.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nBoth scores have positive values between 0.0 and 1.0, larger values\nbeing desirable.\n\nThose 3 metrics are independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore values in any way.\n\nV-Measure is furthermore symmetric: swapping ``labels_true`` and\n``label_pred`` will give the same score. This does not hold for\nhomogeneity and completeness. V-Measure is identical to\n:func:`normalized_mutual_info_score` with the arithmetic averaging\nmethod.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nbeta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nhomogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\ncompleteness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nv_measure : float\n harmonic mean of the first two\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nv_measure_score" - }, - { - "name": "homogeneity_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Homogeneity metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`completeness_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nReturns\n-------\nhomogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\ncompleteness_score\nv_measure_score\n\nExamples\n--------\n\nPerfect labelings are homogeneous::\n\n >>> from sklearn.metrics.cluster import homogeneity_score\n >>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nNon-perfect labelings that further split classes into more clusters can be\nperfectly homogeneous::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 1.000000\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 1.000000\n\nClusters that include samples from different classes do not make for an\nhomogeneous labeling::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0...\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0..." - }, - { - "name": "completeness_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Completeness metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`homogeneity_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nReturns\n-------\ncompleteness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\nhomogeneity_score\nv_measure_score\n\nExamples\n--------\n\nPerfect labelings are complete::\n\n >>> from sklearn.metrics.cluster import completeness_score\n >>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nNon-perfect labelings that assign all classes members to the same clusters\nare still complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 1.0\n >>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.999...\n\nIf classes members are split across different clusters, the\nassignment cannot be complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0\n >>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0" - }, - { - "name": "v_measure_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Ratio of weight attributed to ``homogeneity`` vs ``completeness``. If ``beta`` is greater than 1, ``completeness`` is weighted more strongly in the calculation. If ``beta`` is less than 1, ``homogeneity`` is weighted more strongly." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n v = (1 + beta) * homogeneity * completeness\n / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nbeta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nv_measure : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nnormalized_mutual_info_score\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have score 1.0::\n\n >>> from sklearn.metrics.cluster import v_measure_score\n >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete be not homogeneous, hence penalized::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.66...\n\nLabelings that have pure clusters with members coming from the same\nclasses are homogeneous but un-necessary splits harms completeness\nand thus penalize V-measure as well::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 0.66...\n\nIf classes members are completely split across different clusters,\nthe assignment is totally incomplete, hence the V-Measure is null::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0...\n\nClusters that include samples from totally different classes totally\ndestroy the homogeneity of the labeling, hence::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0..." - }, - { - "name": "mutual_info_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "contingency", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A contingency matrix given by the :func:`contingency_matrix` function. If value is ``None``, it will be computed, otherwise the given value is used, with ``labels_true`` and ``labels_pred`` ignored." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels of\nthe same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\n MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\ncontingency : {ndarray, sparse matrix} of shape (n_classes_true, n_classes_pred), default=None\n A contingency matrix given by the :func:`contingency_matrix` function.\n If value is ``None``, it will be computed, otherwise the given value is\n used, with ``labels_true`` and ``labels_pred`` ignored.\n\nReturns\n-------\nmi : float\n Mutual information, a non-negative value\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted against chance Mutual Information.\nnormalized_mutual_info_score : Normalized Mutual Information." - }, - { - "name": "adjusted_mutual_info_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "average_method", - "type": "str", - "hasDefault": true, - "default": "'arithmetic'", - "limitation": null, - "ignored": false, - "docstring": "How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. .. versionadded:: 0.20 .. versionchanged:: 0.22 The default value of ``average_method`` changed from 'max' to 'arithmetic'." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\naverage_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'max' to\n 'arithmetic'.\n\nReturns\n-------\nami: float (upperlimited by 1.0)\n The AMI returns a value of 1 when the two partitions are identical\n (ie perfectly matched). Random partitions (independent labellings) have\n an expected AMI around 0 on average hence can be negative.\n\nSee Also\n--------\nadjusted_rand_score : Adjusted Rand Index.\nmutual_info_score : Mutual Information (not adjusted for chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the AMI is null::\n\n >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0\n\nReferences\n----------\n.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n Clusterings Comparison: Variants, Properties, Normalization and\n Correction for Chance, JMLR\n `_\n\n.. [2] `Wikipedia entry for the Adjusted Mutual Information\n `_" - }, - { - "name": "normalized_mutual_info_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "average_method", - "type": "str", - "hasDefault": true, - "default": "'arithmetic'", - "limitation": null, - "ignored": false, - "docstring": "How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. .. versionadded:: 0.20 .. versionchanged:: 0.22 The default value of ``average_method`` changed from 'geometric' to 'arithmetic'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalized Mutual Information between two clusterings.\n\nNormalized Mutual Information (NMI) is a normalization of the Mutual\nInformation (MI) score to scale the results between 0 (no mutual\ninformation) and 1 (perfect correlation). In this function, mutual\ninformation is normalized by some generalized mean of ``H(labels_true)``\nand ``H(labels_pred))``, defined by the `average_method`.\n\nThis measure is not adjusted for chance. Therefore\n:func:`adjusted_mutual_info_score` might be preferred.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\naverage_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'geometric' to\n 'arithmetic'.\n\nReturns\n-------\nnmi : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nSee Also\n--------\nv_measure_score : V-Measure (NMI with arithmetic mean option).\nadjusted_rand_score : Adjusted Rand Index.\nadjusted_mutual_info_score : Adjusted Mutual Information (adjusted\n against chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import normalized_mutual_info_score\n >>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the NMI is null::\n\n >>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0" - }, - { - "name": "fowlkes_mallows_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Compute contingency matrix internally with sparse matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = (``n_samples``,)\n A clustering of the data into disjoint subsets.\n\nlabels_pred : array, shape = (``n_samples``, )\n A clustering of the data into disjoint subsets.\n\nsparse : bool, default=False\n Compute contingency matrix internally with sparse matrix.\n\nReturns\n-------\nscore : float\n The resulting Fowlkes-Mallows score.\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally random, hence the FMI is null::\n\n >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\nReferences\n----------\n.. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n hierarchical clusterings\". Journal of the American Statistical\n Association\n `_\n\n.. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n `_" - }, - { - "name": "entropy", - "decorators": [], - "parameters": [ - { - "name": "labels", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The labels" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculates the entropy for a labeling.\n\nParameters\n----------\nlabels : int array, shape = [n_samples]\n The labels\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e)." - } - ] - }, - { - "name": "sklearn.metrics.cluster._unsupervised", - "imports": [ - "import functools", - "import numpy as np", - "from utils import check_random_state", - "from utils import check_X_y", - "from utils import _safe_indexing", - "from pairwise import pairwise_distances_chunked", - "from pairwise import pairwise_distances", - "from preprocessing import LabelEncoder", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "check_number_of_labels", - "decorators": [], - "parameters": [ - { - "name": "n_labels", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of labels." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that number of labels are valid.\n\nParameters\n----------\nn_labels : int\n Number of labels.\n\nn_samples : int\n Number of samples." - }, - { - "name": "silhouette_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An array of pairwise distances between samples, or a feature array." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels for each sample." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`metrics.pairwise.pairwise_distances `. If ``X`` is the distance array itself, use ``metric=\"precomputed\"``." - }, - { - "name": "sample_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the sample to use when computing the Silhouette Coefficient on a random subset of the data. If ``sample_size is None``, no sampling is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for selecting a subset of samples. Used when ``sample_size is not None``. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "**kwds", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a scipy.spatial.distance metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the mean Silhouette Coefficient of all samples.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``. To clarify, ``b`` is the distance between a sample and the nearest\ncluster that the sample is not a part of.\nNote that Silhouette Coefficient is only defined if number of labels\nis ``2 <= n_labels <= n_samples - 1``.\n\nThis function returns the mean Silhouette Coefficient over all samples.\nTo obtain the values for each sample, use :func:`silhouette_samples`.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters. Negative values generally indicate that a sample has\nbeen assigned to the wrong cluster, as a different cluster is more similar.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`metrics.pairwise.pairwise_distances\n `. If ``X`` is\n the distance array itself, use ``metric=\"precomputed\"``.\n\nsample_size : int, default=None\n The size of the sample to use when computing the Silhouette Coefficient\n on a random subset of the data.\n If ``sample_size is None``, no sampling is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for selecting a subset of samples.\n Used when ``sample_size is not None``.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : float\n Mean Silhouette Coefficient for all samples.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n `_" - }, - { - "name": "_silhouette_reduce", - "decorators": [], - "parameters": [ - { - "name": "D_chunk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed distances for a chunk." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First index in the chunk." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Corresponding cluster labels, encoded as {0, ..., n_clusters-1}." - }, - { - "name": "label_freqs", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distribution of cluster labels in ``labels``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Accumulate silhouette statistics for vertical chunk of X.\n\nParameters\n----------\nD_chunk : array-like of shape (n_chunk_samples, n_samples)\n Precomputed distances for a chunk.\nstart : int\n First index in the chunk.\nlabels : array-like of shape (n_samples,)\n Corresponding cluster labels, encoded as {0, ..., n_clusters-1}.\nlabel_freqs : array-like\n Distribution of cluster labels in ``labels``." - }, - { - "name": "silhouette_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An array of pairwise distances between samples, or a feature array." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Label values for each sample." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If ``X`` is the distance array itself, use \"precomputed\" as the metric. Precomputed distance matrices must have 0 along the diagonal." - }, - { - "name": "`**kwds`", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a ``scipy.spatial.distance`` metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Silhouette Coefficient for each sample.\n\nThe Silhouette Coefficient is a measure of how well samples are clustered\nwith samples that are similar to themselves. Clustering models with a high\nSilhouette Coefficient are said to be dense, where samples in the same\ncluster are similar to each other, and well separated, where samples in\ndifferent clusters are not very similar to each other.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``.\nNote that Silhouette Coefficient is only defined if number of labels\nis 2 ``<= n_labels <= n_samples - 1``.\n\nThis function returns the Silhouette Coefficient for each sample.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n Label values for each sample.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`.\n If ``X`` is the distance array itself, use \"precomputed\" as the metric.\n Precomputed distance matrices must have 0 along the diagonal.\n\n`**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a ``scipy.spatial.distance`` metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : array-like of shape (n_samples,)\n Silhouette Coefficients for each sample.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n `_" - }, - { - "name": "calinski_harabasz_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of ``n_features``-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels for each sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Calinski and Harabasz score.\n\nIt is also known as the Variance Ratio Criterion.\n\nThe score is defined as ratio between the within-cluster dispersion and\nthe between-cluster dispersion.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nReturns\n-------\nscore : float\n The resulting Calinski-Harabasz score.\n\nReferences\n----------\n.. [1] `T. Calinski and J. Harabasz, 1974. \"A dendrite method for cluster\n analysis\". Communications in Statistics\n `_" - }, - { - "name": "davies_bouldin_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of ``n_features``-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels for each sample." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the Davies-Bouldin score.\n\nThe score is defined as the average similarity measure of each cluster with\nits most similar cluster, where similarity is the ratio of within-cluster\ndistances to between-cluster distances. Thus, clusters which are farther\napart and less dispersed will result in a better score.\n\nThe minimum score is zero, with lower values indicating better clustering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nReturns\n-------\nscore: float\n The resulting Davies-Bouldin score.\n\nReferences\n----------\n.. [1] Davies, David L.; Bouldin, Donald W. (1979).\n `\"A Cluster Separation Measure\"\n `__.\n IEEE Transactions on Pattern Analysis and Machine Intelligence.\n PAMI-1 (2): 224-227" - } - ] - }, - { - "name": "sklearn.metrics.cluster", - "imports": [ - "from _supervised import adjusted_mutual_info_score", - "from _supervised import normalized_mutual_info_score", - "from _supervised import adjusted_rand_score", - "from _supervised import rand_score", - "from _supervised import completeness_score", - "from _supervised import contingency_matrix", - "from _supervised import pair_confusion_matrix", - "from _supervised import expected_mutual_information", - "from _supervised import homogeneity_completeness_v_measure", - "from _supervised import homogeneity_score", - "from _supervised import mutual_info_score", - "from _supervised import v_measure_score", - "from _supervised import fowlkes_mallows_score", - "from _supervised import entropy", - "from _unsupervised import silhouette_samples", - "from _unsupervised import silhouette_score", - "from _unsupervised import calinski_harabasz_score", - "from _unsupervised import davies_bouldin_score", - "from _bicluster import consensus_score" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.cluster.tests.test_bicluster", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.metrics.cluster._bicluster import _jaccard", - "from sklearn.metrics import consensus_score" - ], - "classes": [], - "functions": [ - { - "name": "test_jaccard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consensus_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consensus_score_issue2445", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Different number of biclusters in A and B" - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests.test_common", - "imports": [ - "from functools import partial", - "import pytest", - "import numpy as np", - "from sklearn.metrics.cluster import adjusted_mutual_info_score", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.metrics.cluster import rand_score", - "from sklearn.metrics.cluster import completeness_score", - "from sklearn.metrics.cluster import fowlkes_mallows_score", - "from sklearn.metrics.cluster import homogeneity_score", - "from sklearn.metrics.cluster import mutual_info_score", - "from sklearn.metrics.cluster import normalized_mutual_info_score", - "from sklearn.metrics.cluster import v_measure_score", - "from sklearn.metrics.cluster import silhouette_score", - "from sklearn.metrics.cluster import calinski_harabasz_score", - "from sklearn.metrics.cluster import davies_bouldin_score", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_symmetric_non_symmetric_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_symmetry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_symmetry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalized_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permute_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_format_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inf_nan_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests.test_supervised", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.metrics.cluster import adjusted_mutual_info_score", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.metrics.cluster import rand_score", - "from sklearn.metrics.cluster import completeness_score", - "from sklearn.metrics.cluster import contingency_matrix", - "from sklearn.metrics.cluster import pair_confusion_matrix", - "from sklearn.metrics.cluster import entropy", - "from sklearn.metrics.cluster import expected_mutual_information", - "from sklearn.metrics.cluster import fowlkes_mallows_score", - "from sklearn.metrics.cluster import homogeneity_completeness_v_measure", - "from sklearn.metrics.cluster import homogeneity_score", - "from sklearn.metrics.cluster import mutual_info_score", - "from sklearn.metrics.cluster import normalized_mutual_info_score", - "from sklearn.metrics.cluster import v_measure_score", - "from sklearn.metrics.cluster._supervised import _generalized_average", - "from sklearn.metrics.cluster._supervised import check_clusterings", - "from sklearn.utils import assert_all_finite", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import ignore_warnings", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_error_messages_on_wrong_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_generalized_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perfect_matches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_homogeneous_but_not_complete_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complete_but_not_homogeneous_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_complete_and_not_homogeneous_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_beta_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_consecutive_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "uniform_labelings_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adjustment_for_chance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adjusted_mutual_info_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_expected_mutual_info_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_int_overflow_mutual_info_fowlkes_mallows_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_entropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_contingency_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_contingency_matrix_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_exactly_zero_info_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_v_measure_and_mutual_information", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fowlkes_mallows_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fowlkes_mallows_score_properties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_score_positive_constant_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_clustering_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pair_confusion_matrix_fully_dispersed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pair_confusion_matrix_single_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pair_confusion_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rand_score_edge_cases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rand_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests.test_unsupervised", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from scipy.sparse import csr_matrix", - "from sklearn import datasets", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.metrics.cluster import silhouette_score", - "from sklearn.metrics.cluster import silhouette_samples", - "from sklearn.metrics import pairwise_distances", - "from sklearn.metrics.cluster import calinski_harabasz_score", - "from sklearn.metrics.cluster import davies_bouldin_score" - ], - "classes": [], - "functions": [ - { - "name": "test_silhouette", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_size_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_silhouette_paper_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_labelsize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_encoded_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_numpy_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_silhouette_nonzero_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_raises_on_only_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert message when there is only one label" - }, - { - "name": "assert_raises_on_all_points_same_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert message when all point are in different clusters" - }, - { - "name": "test_calinski_harabasz_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_davies_bouldin_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.tests.test_classification", - "imports": [ - "from functools import partial", - "from itertools import product", - "from itertools import chain", - "from itertools import permutations", - "import warnings", - "import numpy as np", - "from scipy import linalg", - "import pytest", - "from sklearn import datasets", - "from sklearn import svm", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.preprocessing import label_binarize", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_warns_div0", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import balanced_accuracy_score", - "from sklearn.metrics import classification_report", - "from sklearn.metrics import cohen_kappa_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import fbeta_score", - "from sklearn.metrics import hamming_loss", - "from sklearn.metrics import hinge_loss", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import matthews_corrcoef", - "from sklearn.metrics import precision_recall_fscore_support", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import zero_one_loss", - "from sklearn.metrics import brier_score_loss", - "from sklearn.metrics import multilabel_confusion_matrix", - "from sklearn.metrics._classification import _check_targets", - "from sklearn.exceptions import UndefinedMetricWarning", - "from scipy.spatial.distance import hamming as sp_hamming", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from pandas import Series", - "from pandas import DataFrame" - ], - "classes": [], - "functions": [ - { - "name": "make_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make some classification predictions on a toy dataset using a SVC\n\nIf binary is True restrict to a binary classification problem instead of a\nmulticlass classification problem" - }, - { - "name": "test_classification_report_dictionary_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_output_dict_empty_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_zero_division_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_accuracy_score_subset_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_binary_single_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_extra_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_ignored_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_score_non_binary_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_duplicate_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_tied_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_fscore_support_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_unused_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_normalize_wrong_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_normalize_single_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cohen_kappa", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_against_numpy_corrcoef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_against_jurman", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_refcall_f1_score_multilabel_unordered_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_binary_averaged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_precision_recall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_multiclass_subset_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_on_zero_length_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_balanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_label_detection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_string_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_unicode_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_long_string_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_labels_target_names_unequal_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_no_labels_target_names_unequal_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_classification_report", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_zero_one_loss_subset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_hamming_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_jaccard_score_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_jaccard_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_jaccard_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_binary_jaccard_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_jaccard_score_zero_division_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_jaccard_score_zero_division_set_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_multilabel_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_multilabel_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_with_an_empty_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels_check_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels_average_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels_average_none_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prf_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prf_no_warnings_if_zero_division_set", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_recall_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fscore_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prf_average_binary_data_non_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_targets_multiclass_with_both_y_true_and_y_pred_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_missing_labels_with_labels_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_with_missing_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_missing_labels_only_two_unq_in_y_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_invariance_lists", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_loss_pandas_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_brier_score_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balanced_accuracy_score_unseen", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balanced_accuracy_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_common", - "imports": [ - "from functools import partial", - "from inspect import signature", - "from itertools import product", - "from itertools import chain", - "from itertools import permutations", - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils import shuffle", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import balanced_accuracy_score", - "from sklearn.metrics import brier_score_loss", - "from sklearn.metrics import cohen_kappa_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import coverage_error", - "from sklearn.metrics import det_curve", - "from sklearn.metrics import explained_variance_score", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import fbeta_score", - "from sklearn.metrics import hamming_loss", - "from sklearn.metrics import hinge_loss", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import label_ranking_average_precision_score", - "from sklearn.metrics import label_ranking_loss", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import max_error", - "from sklearn.metrics import matthews_corrcoef", - "from sklearn.metrics import mean_absolute_error", - "from sklearn.metrics import mean_absolute_percentage_error", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import mean_tweedie_deviance", - "from sklearn.metrics import mean_poisson_deviance", - "from sklearn.metrics import mean_gamma_deviance", - "from sklearn.metrics import median_absolute_error", - "from sklearn.metrics import multilabel_confusion_matrix", - "from sklearn.metrics import precision_recall_curve", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import roc_curve", - "from sklearn.metrics import zero_one_loss", - "from sklearn.metrics import ndcg_score", - "from sklearn.metrics import dcg_score", - "from sklearn.metrics import top_k_accuracy_score", - "from sklearn.metrics._base import _average_binary_score" - ], - "classes": [], - "functions": [ - { - "name": "precision_recall_curve_padded_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The dimensions of precision-recall pairs and the threshold array as\nreturned by the precision_recall_curve do not match. See\nfunc:`sklearn.metrics.precision_recall_curve`\n\nThis prevents implicit conversion of return value triple to an higher\ndimensional np.array of dtype('float64') (it will be of dtype('object)\ninstead). This again is needed for assert_array_equal to work correctly.\n\nAs a workaround we pad the threshold array with NaN values to match\nthe dimension of precision and recall arrays respectively." - }, - { - "name": "_require_positive_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make targets strictly positive" - }, - { - "name": "test_symmetry_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_symmetric_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_symmetric_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_order_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_order_invariance_multilabel_and_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_format_invariance_with_1d_vectors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_invariance_string_vs_numbers_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_invariance_string_vs_numbers_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_thresholded_inf_nan_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_inf_nan_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check that classification metrics raise a message mentioning the\noccurrence of non-finite values in the target vectors." - }, - { - "name": "test_classification_binary_continuous_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check that classification metrics raise a message of mixed type data\nwith continuous/binary target vectors." - }, - { - "name": "check_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_single_sample_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_sample_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_number_of_output_differ", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_regression_invariance_to_dimension_shuffling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_representation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raise_value_error_multilabel_sequences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option_binary_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option_multiclass_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option_multilabel_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_averaging", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_averaging", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multilabel_all_zeroes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_binary_multilabel_all_zeroes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multilabel_all_ones", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_averaging_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_label_permutations_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_multilabel_multioutput_permutations_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_metric_permutation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_metrics_consistent_type_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_metrics_pos_label_error_str", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_pairwise", - "imports": [ - "from types import GeneratorType", - "import numpy as np", - "from numpy import linalg", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from scipy.spatial.distance import cosine", - "from scipy.spatial.distance import cityblock", - "from scipy.spatial.distance import minkowski", - "from scipy.spatial.distance import cdist", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import squareform", - "from scipy.spatial.distance import wminkowski", - "from scipy.spatial.distance import minkowski as wminkowski", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "import pytest", - "from sklearn import config_context", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.metrics.pairwise import euclidean_distances", - "from sklearn.metrics.pairwise import nan_euclidean_distances", - "from sklearn.metrics.pairwise import manhattan_distances", - "from sklearn.metrics.pairwise import haversine_distances", - "from sklearn.metrics.pairwise import linear_kernel", - "from sklearn.metrics.pairwise import chi2_kernel", - "from sklearn.metrics.pairwise import additive_chi2_kernel", - "from sklearn.metrics.pairwise import polynomial_kernel", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.metrics.pairwise import laplacian_kernel", - "from sklearn.metrics.pairwise import sigmoid_kernel", - "from sklearn.metrics.pairwise import cosine_similarity", - "from sklearn.metrics.pairwise import cosine_distances", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.metrics.pairwise import pairwise_distances_chunked", - "from sklearn.metrics.pairwise import pairwise_distances_argmin_min", - "from sklearn.metrics.pairwise import pairwise_distances_argmin", - "from sklearn.metrics.pairwise import pairwise_kernels", - "from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS", - "from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS", - "from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS", - "from sklearn.metrics.pairwise import PAIRED_DISTANCES", - "from sklearn.metrics.pairwise import check_pairwise_arrays", - "from sklearn.metrics.pairwise import check_paired_arrays", - "from sklearn.metrics.pairwise import paired_distances", - "from sklearn.metrics.pairwise import paired_euclidean_distances", - "from sklearn.metrics.pairwise import paired_manhattan_distances", - "from sklearn.metrics.pairwise import _euclidean_distances_upcast", - "from sklearn.preprocessing import normalize", - "from sklearn.exceptions import DataConversionWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_pairwise_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_boolean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_data_conversion_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_precomputed_non_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "callable_rbf_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_callable_nonstrict_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_kernels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_kernels_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_kernels_filter_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_distances_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_argmin_min", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reduce_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pairwise_distances_chunked", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_pairwise_distances_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_known_result", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_with_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_sym", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_upcast", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_upcast_sym", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_extreme_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_equal_to_euclidean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_2x2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_complete_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_not_trival", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_one_feature_match_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cosine_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_haversine_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_euclidean_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_manhattan_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi_square_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_symmetry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rbf_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_laplacian_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_similarity_sparse_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cosine_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_dense_matrices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_XB_returned", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_different_dimensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_invalid_dimensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_sparse_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "tuplify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_tuple_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_preserve_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_data_derived_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numeric_pairwise_distances_datatypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_ranking", - "imports": [ - "import re", - "import pytest", - "import numpy as np", - "import warnings", - "from scipy.sparse import csr_matrix", - "from sklearn import datasets", - "from sklearn import svm", - "from sklearn.utils.extmath import softmax", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.utils.validation import check_array", - "from sklearn.utils.validation import check_consistent_length", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import auc", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import coverage_error", - "from sklearn.metrics import det_curve", - "from sklearn.metrics import label_ranking_average_precision_score", - "from sklearn.metrics import precision_recall_curve", - "from sklearn.metrics import label_ranking_loss", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import roc_curve", - "from sklearn.metrics._ranking import _ndcg_sample_scores", - "from sklearn.metrics._ranking import _dcg_sample_scores", - "from sklearn.metrics import ndcg_score", - "from sklearn.metrics import dcg_score", - "from sklearn.metrics import top_k_accuracy_score", - "from sklearn.exceptions import UndefinedMetricWarning", - "from sklearn.model_selection import train_test_split", - "from sklearn.linear_model import LogisticRegression" - ], - "classes": [], - "functions": [ - { - "name": "make_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make some classification predictions on a toy dataset using a SVC\n\nIf binary is True restrict to a binary classification problem instead of a\nmulticlass classification problem" - }, - { - "name": "_auc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Alternative implementation to check for correctness of\n`roc_auc_score`." - }, - { - "name": "_average_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Alternative implementation to check for correctness of\n`average_precision_score`.\n\nNote that this implementation fails on some edge cases.\nFor example, for constant predictions e.g. [0.5, 0.5, 0.5],\ny_true = [1, 0, 0] returns an average precision of 0.33...\nbut y_true = [0, 0, 1] returns 1.0." - }, - { - "name": "_average_precision_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A second alternative implementation of average precision that closely\nfollows the Wikipedia article's definition (see References). This should\ngive identical results as `average_precision_score` for all inputs.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Average precision\n `_" - }, - { - "name": "_partial_roc_auc_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Alternative implementation to check for correctness of `roc_auc_score`\nwith `max_fpr` set." - }, - { - "name": "test_roc_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_end_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_returns_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_multi", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_confidence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_hard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_drop_intermediate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_fpr_tpr_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auc_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_ovo_roc_auc_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_ovo_roc_auc_toydata_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_ovr_roc_auc_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_auc_score_multiclass_labels_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_auc_score_multiclass_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auc_score_non_binary_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_clf_curve_multiclass_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_clf_curve_implicit_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_precision_recall_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_constant_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_pos_label_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_scale_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_tie_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_sanity_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_constant_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_perfect_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_bad_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_zero_or_all_relevant_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_error_raised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_only_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_without_tie_and_increasing_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_my_lrap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Simple implementation of label ranking average precision" - }, - { - "name": "check_alternative_lrap_implementation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_ranking_avp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lrap_error_raised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_alternative_lrap_implementation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lrap_sample_weighting_zero_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coverage_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coverage_tie_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_ranking_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ranking_appropriate_input_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ranking_loss_ties_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dcg_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_dcg_score_for", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dcg_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_ignore_ties_with_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_invariant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_toy_examples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ndcg_score_for", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_roc_auc_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_regression", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "from itertools import product", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.metrics import explained_variance_score", - "from sklearn.metrics import mean_absolute_error", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import mean_squared_log_error", - "from sklearn.metrics import median_absolute_error", - "from sklearn.metrics import mean_absolute_percentage_error", - "from sklearn.metrics import max_error", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import mean_tweedie_deviance", - "from sklearn.metrics._regression import _check_reg_targets", - "from exceptions import UndefinedMetricWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_regression_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_squared_error_multioutput_raw_value_squared", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_metrics_at_limits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_reg_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_reg_targets_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_multioutput_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_custom_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tweedie_deviance_continuity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_absolute_percentage_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_score_objects", - "imports": [ - "from copy import deepcopy", - "import pickle", - "import tempfile", - "import shutil", - "import os", - "import numbers", - "from unittest.mock import Mock", - "from functools import partial", - "import numpy as np", - "import pytest", - "import joblib", - "from numpy.testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.base import BaseEstimator", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import brier_score_loss", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import fbeta_score", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import cluster as cluster_module", - "from sklearn.metrics import check_scoring", - "from sklearn.metrics._scorer import _PredictScorer", - "from sklearn.metrics._scorer import _passthrough_scorer", - "from sklearn.metrics._scorer import _MultimetricScorer", - "from sklearn.metrics._scorer import _check_multimetric_scoring", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import get_scorer", - "from sklearn.metrics import SCORERS", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.svm import LinearSVC", - "from sklearn.pipeline import make_pipeline", - "from sklearn.cluster import KMeans", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Perceptron", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.datasets import load_diabetes", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.utils import shuffle" - ], - "classes": [ - { - "name": "EstimatorWithoutFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "EstimatorWithFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "EstimatorWithFitAndScore", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "EstimatorWithFitAndPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "DummyScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy scorer that always returns 1." - } - ], - "functions": [ - { - "name": "_require_positive_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make targets strictly positive" - }, - { - "name": "_make_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "setup_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "teardown_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_scorers_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_scoring_validator_for_single_metric_usecases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scoring_and_check_multimetric_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scoring_and_check_multimetric_scoring_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scoring_gridsearchcv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_scorers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_scorers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_scorers_multilabel_indicator_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_supervised_cluster_scorers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_on_score_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_scorer_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_scorer_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorer_memmap_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scoring_is_not_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_calls_method_once", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_calls_method_once_classifier_no_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_calls_method_once_regressor_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_sanity_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_roc_proba_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_roc_proba_scorer_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_roc_no_proba_scorer_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "string_labeled_classification_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Train a classifier on binary problem with string target.\n\nThe classifier is trained on a binary classification problem where the\nminority class of interest has a string label that is intentionally not the\ngreatest class label using the lexicographic order. In this case, \"cancer\"\nis the positive label, and `classifier.classes_` is\n`[\"cancer\", \"not cancer\"]`.\n\nIn addition, the dataset is imbalanced to better identify problems when\nusing non-symmetric performance metrics such as f1-score, average precision\nand so on.\n\nReturns\n-------\nclassifier : estimator object\n Trained classifier on the binary problem.\nX_test : ndarray of shape (n_samples, n_features)\n Data to be used as testing set in tests.\ny_test : ndarray of shape (n_samples,), dtype=object\n Binary target where labels are strings.\ny_pred : ndarray of shape (n_samples,), dtype=object\n Prediction of `classifier` when predicting for `X_test`.\ny_pred_proba : ndarray of shape (n_samples, 2), dtype=np.float64\n Probabilities of `classifier` when predicting for `X_test`.\ny_pred_decision : ndarray of shape (n_samples,), dtype=np.float64\n Decision function values of `classifier` when predicting on `X_test`." - }, - { - "name": "test_average_precision_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_brier_score_loss_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_symmetric_metric_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorer_select_proba_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorer_no_op_multiclass_select_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics._plot.base", - "imports": [ - "import numpy as np", - "from sklearn.base import is_classifier" - ], - "classes": [], - "functions": [ - { - "name": "_check_classifier_response_method", - "decorators": [], - "parameters": [ - { - "name": "estimator: object", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classifier to check" - }, - { - "name": "response_method: {'auto'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'predict_proba'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'decision_function'}", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return prediction method from the response_method\n\nParameters\n----------\nestimator: object\n Classifier to check\n\nresponse_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nReturns\n-------\nprediction_method: callable\n prediction method of estimator" - }, - { - "name": "_get_response", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "response_method: {'auto'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'predict_proba'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'decision_function'}", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the metrics. By default, `estimators.classes_[1]` is considered as the positive class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return response and positive label.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nresponse_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing\n the metrics. By default, `estimators.classes_[1]` is\n considered as the positive class.\n\nReturns\n-------\ny_pred: ndarray of shape (n_samples,)\n Target scores calculated from the provided response_method\n and pos_label.\n\npos_label: str or int\n The class considered as the positive class when computing\n the metrics." - } - ] - }, - { - "name": "sklearn.metrics._plot.confusion_matrix", - "imports": [ - "from itertools import product", - "import numpy as np", - "from None import confusion_matrix", - "from utils import check_matplotlib_support", - "from utils.multiclass import unique_labels", - "from utils.validation import _deprecate_positional_args", - "from base import is_classifier", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "ConfusionMatrixDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "confusion_matrix", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Confusion matrix." - }, - { - "name": "display_labels", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Display labels for plot. If None, display labels are set from 0 to `n_classes - 1`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "include_values", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Includes values in confusion matrix." - }, - { - "name": "cmap", - "type": "str", - "hasDefault": true, - "default": "'viridis'", - "limitation": null, - "ignored": false, - "docstring": "Colormap recognized by matplotlib." - }, - { - "name": "xticks_rotation", - "type": "Literal['vertical', 'horizontal']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Rotation of xtick labels." - }, - { - "name": "values_format", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Format specification for values in confusion matrix. If `None`, the format specification is 'd' or '.2g' whichever is shorter." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "colorbar", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to add a colorbar to the plot." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization.\n\nParameters\n----------\ninclude_values : bool, default=True\n Includes values in confusion matrix.\n\ncmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\nxticks_rotation : {'vertical', 'horizontal'} or float, default='horizontal'\n Rotation of xtick labels.\n\nvalues_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\ncolorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`" - } - ], - "docstring": "Confusion Matrix visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nconfusion_matrix : ndarray of shape (n_classes, n_classes)\n Confusion matrix.\n\ndisplay_labels : ndarray of shape (n_classes,), default=None\n Display labels for plot. If None, display labels are set from 0 to\n `n_classes - 1`.\n\nAttributes\n----------\nim_ : matplotlib AxesImage\n Image representing the confusion matrix.\n\ntext_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, or None\n Array of matplotlib axes. `None` if `include_values` is false.\n\nax_ : matplotlib Axes\n Axes with confusion matrix.\n\nfigure_ : matplotlib Figure\n Figure containing the confusion matrix.\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\nplot_confusion_matrix : Plot Confusion Matrix.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n>>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n... display_labels=clf.classes_)\n>>> disp.plot() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index the matrix. This may be used to reorder or select a subset of labels. If `None` is given, those that appear at least once in `y_true` or `y_pred` are used in sorted order." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "normalize", - "type": "Literal['true', 'pred', 'all']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized." - }, - { - "name": "display_labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target names used for plotting. By default, `labels` will be used if it is defined, otherwise the unique labels of `y_true` and `y_pred` will be used." - }, - { - "name": "include_values", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Includes values in confusion matrix." - }, - { - "name": "xticks_rotation", - "type": "Literal['vertical', 'horizontal']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Rotation of xtick labels." - }, - { - "name": "values_format", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Format specification for values in confusion matrix. If `None`, the format specification is 'd' or '.2g' whichever is shorter." - }, - { - "name": "cmap", - "type": "str", - "hasDefault": true, - "default": "'viridis'", - "limitation": null, - "ignored": false, - "docstring": "Colormap recognized by matplotlib." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "colorbar", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to add a colorbar to the plot. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot Confusion Matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny_true : array-like of shape (n_samples,)\n Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to reorder or\n select a subset of labels. If `None` is given, those that appear at\n least once in `y_true` or `y_pred` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n Target names used for plotting. By default, `labels` will be used if\n it is defined, otherwise the unique labels of `y_true` and `y_pred`\n will be used.\n\ninclude_values : bool, default=True\n Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float, default='horizontal'\n Rotation of xtick labels.\n\nvalues_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\ncolorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_confusion_matrix\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> plot_confusion_matrix(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.metrics._plot.det_curve", - "imports": [ - "import scipy as sp", - "from base import _get_response", - "from None import det_curve", - "from utils import check_matplotlib_support", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "DetCurveDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "False positive rate." - }, - { - "name": "tpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True positive rate." - }, - { - "name": "estimator_name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of estimator. If None, the estimator name is not shown." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of DET curve for labeling. If `None`, use the name of the estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization.\n\nParameters\n----------\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n Object that stores computed values." - } - ], - "docstring": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_det_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nfpr : ndarray\n False positive rate.\n\ntpr : ndarray\n True positive rate.\n\nestimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n The label of the positive class.\n\nAttributes\n----------\nline_ : matplotlib Artist\n DET Curve.\n\nax_ : matplotlib Axes\n Axes with DET Curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nplot_det_curve : Plot detection error tradeoff (DET) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = metrics.det_curve(y, pred)\n>>> display = metrics.DetCurveDisplay(\n... fpr=fpr, fnr=fnr, estimator_name='example estimator'\n... )\n>>> display.plot() # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_det_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "response_method", - "type": "Literal['predict_proba', 'decision_function', 'auto']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the predicted target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of DET curve for labeling. If `None`, use the name of the estimator." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an error will be raised." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'} default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the predicted target response. If set to\n 'auto', :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The label of the positive class.\n When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n `pos_label` is set to 1, otherwise an error will be raised.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay : DET curve visualization.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n... X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_det_curve(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.metrics._plot.precision_recall_curve", - "imports": [ - "from base import _get_response", - "from None import average_precision_score", - "from None import precision_recall_curve", - "from utils import check_matplotlib_support", - "from utils.validation import _deprecate_positional_args", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "PrecisionRecallDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "precision", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precision values." - }, - { - "name": "recall", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Recall values." - }, - { - "name": "average_precision", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Average precision. If None, the average precision is not shown." - }, - { - "name": "estimator_name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of estimator. If None, then the estimator name is not shown." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class. If None, the class will not be shown in the legend. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of precision recall curve for labeling. If `None`, use the name of the estimator." - }, - { - "name": "**kwargs", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to be passed to matplotlib's `plot`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nParameters\n----------\nax : Matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of precision recall curve for labeling. If `None`, use the\n name of the estimator.\n\n**kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values." - } - ], - "docstring": "Precision Recall visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`\nto create a visualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n-----------\nprecision : ndarray\n Precision values.\n\nrecall : ndarray\n Recall values.\n\naverage_precision : float, default=None\n Average precision. If None, the average precision is not shown.\n\nestimator_name : str, default=None\n Name of estimator. If None, then the estimator name is not shown.\n\npos_label : str or int, default=None\n The class considered as the positive class. If None, the class will not\n be shown in the legend.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n Precision recall curve.\n\nax_ : matplotlib Axes\n Axes with precision recall curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\nplot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import (precision_recall_curve,\n... PrecisionRecallDisplay)\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> precision, recall, _ = precision_recall_curve(y_test, predictions)\n>>> disp = PrecisionRecallDisplay(precision=precision, recall=recall)\n>>> disp.plot() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_precision_recall_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Binary target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "response_method", - "type": "Literal['predict_proba', 'decision_function', 'auto']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name for labeling curve. If `None`, the name of the estimator is used." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the precision and recall metrics. By default, `estimators.classes_[1]` is considered as the positive class. .. versionadded:: 0.24" - }, - { - "name": "**kwargs", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to be passed to matplotlib's `plot`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Binary target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name for labeling curve. If `None`, the name of the\n estimator is used.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the precision\n and recall metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\n**kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\nPrecisionRecallDisplay : Precision Recall visualization." - } - ] - }, - { - "name": "sklearn.metrics._plot.roc_curve", - "imports": [ - "from base import _get_response", - "from None import auc", - "from None import roc_curve", - "from utils import check_matplotlib_support", - "from utils.validation import _deprecate_positional_args", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "RocCurveDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "False positive rate." - }, - { - "name": "tpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True positive rate." - }, - { - "name": "roc_auc", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Area under ROC curve. If None, the roc_auc score is not shown." - }, - { - "name": "estimator_name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of estimator. If None, the estimator name is not shown." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the roc auc metrics. By default, `estimators.classes_[1]` is considered as the positive class. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of ROC Curve for labeling. If `None`, use the name of the estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.\n\nParameters\n----------\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n Object that stores computed values." - } - ], - "docstring": "ROC Curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfpr : ndarray\n False positive rate.\n\ntpr : ndarray\n True positive rate.\n\nroc_auc : float, default=None\n Area under ROC curve. If None, the roc_auc score is not shown.\n\nestimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n ROC Curve.\n\nax_ : matplotlib Axes\n Axes with ROC Curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n>>> roc_auc = metrics.auc(fpr, tpr)\n>>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='example estimator')\n>>> display.plot() # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_roc_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "drop_intermediate", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve. This is useful in order to create lighter ROC curves." - }, - { - "name": "response_method", - "type": "Literal['predict_proba', 'decision_function', 'auto']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of ROC Curve for labeling. If `None`, use the name of the estimator." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the roc auc metrics. By default, `estimators.classes_[1]` is considered as the positive class. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndrop_intermediate : boolean, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'} default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.RocCurveDisplay`\n Object that stores computed values.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n... X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.metrics._plot", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_confusion_matrix", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_array_equal", - "from sklearn.compose import make_column_transformer", - "from sklearn.datasets import make_classification", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import plot_confusion_matrix", - "from sklearn.metrics import ConfusionMatrixDisplay", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "n_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fitted_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_pred", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_on_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_on_invalid_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_confusion_matrix_custom_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_confusion_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_display", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_contrast", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_confusion_matrix_colorbar", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_text_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_standard_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_on_a_dataset_with_unseen_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that when labels=None, the unique values in `y_pred` and `y_true`\nwill be used.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/pull/18405" - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_curve_common", - "imports": [ - "import pytest", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.compose import make_column_transformer", - "from sklearn.datasets import load_iris", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.metrics import plot_det_curve", - "from sklearn.metrics import plot_roc_curve" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_curve_error_non_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_curve_error_no_response", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_curve_estimator_name_multiple_calls", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_det_curve_not_fitted_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_det_curve", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_iris", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.metrics import det_curve", - "from sklearn.metrics import plot_det_curve", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_det_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_precision_recall", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.metrics import plot_precision_recall_curve", - "from sklearn.metrics import PrecisionRecallDisplay", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import precision_recall_curve", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.model_selection import train_test_split", - "from sklearn.exceptions import NotFittedError", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import shuffle", - "from sklearn.compose import make_column_transformer", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "test_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_bad_response", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_precision_recall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_string_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_precision_recall_curve_estimator_name_multiple_calls", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_precision_recall_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_roc_curve", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.metrics import plot_roc_curve", - "from sklearn.metrics import RocCurveDisplay", - "from sklearn.metrics import roc_curve", - "from sklearn.metrics import auc", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.model_selection import train_test_split", - "from sklearn.exceptions import NotFittedError", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import shuffle", - "from sklearn.compose import make_column_transformer", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_roc_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_not_fitted_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_roc_curve_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.mixture._base", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from time import time", - "import numpy as np", - "from scipy.special import logsumexp", - "from None import cluster", - "from base import BaseEstimator", - "from base import DensityMixin", - "from exceptions import ConvergenceWarning", - "from utils import check_array", - "from utils import check_random_state", - "from utils.validation import check_is_fitted" - ], - "classes": [ - { - "name": "BaseMixture", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_initial_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check values of the basic parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check initial parameters of the derived class.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_initialize_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A random number generator instance that controls the random seed used for the method chosen to initialize the parameters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the model parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nrandom_state : RandomState\n A random number generator instance that controls the random seed\n used for the method chosen to initialize the parameters." - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the model parameters of the derived class.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Estimate model parameters with the EM algorithm.\n\nThe method fits the model ``n_init`` times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for ``max_iter``\ntimes until the change of likelihood or lower bound is less than\n``tol``, otherwise, a ``ConvergenceWarning`` is raised.\nIf ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\ninitialization is performed upon the first call. Upon consecutive\ncalls, training starts where it left off.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate model parameters using X and predict the labels for X.\n\nThe method fits the model n_init times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for `max_iter`\ntimes until the change of likelihood or lower bound is less than\n`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\nraised. After fitting, it predicts the most probable label for the\ninput data points.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n Component labels." - }, - { - "name": "_e_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "E step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob_norm : float\n Mean of the logarithms of the probabilities of each sample in X\n\nlog_responsibility : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_m_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_get_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the weighted log probabilities for each sample.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlog_prob : array, shape (n_samples,)\n Log probabilities of each data point in X." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the per-sample average log-likelihood of the given data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_dimensions)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlog_likelihood : float\n Log likelihood of the Gaussian mixture given X." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels for the data samples in X using trained model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n Component labels." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict posterior probability of each component given the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nresp : array, shape (n_samples, n_components)\n Returns the probability each Gaussian (state) in\n the model given each sample." - }, - { - "name": "sample", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate random samples from the fitted Gaussian distribution.\n\nParameters\n----------\nn_samples : int, default=1\n Number of samples to generate.\n\nReturns\n-------\nX : array, shape (n_samples, n_features)\n Randomly generated sample\n\ny : array, shape (nsamples,)\n Component labels" - }, - { - "name": "_estimate_weighted_log_prob", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nweighted_log_prob : array, shape (n_samples, n_component)" - }, - { - "name": "_estimate_log_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\nReturns\n-------\nlog_weight : array, shape (n_components, )" - }, - { - "name": "_estimate_log_prob", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the log-probabilities log P(X | Z).\n\nCompute the log-probabilities per each component for each sample.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_component)" - }, - { - "name": "_estimate_log_prob_resp", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate log probabilities and responsibilities for each sample.\n\nCompute the log probabilities, weighted log probabilities per\ncomponent and responsibilities for each sample in X with respect to\nthe current state of the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob_norm : array, shape (n_samples,)\n log p(X)\n\nlog_responsibilities : array, shape (n_samples, n_components)\n logarithm of the responsibilities" - }, - { - "name": "_print_verbose_msg_init_beg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print verbose message on initialization." - }, - { - "name": "_print_verbose_msg_iter_end", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print verbose message on initialization." - }, - { - "name": "_print_verbose_msg_init_end", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print verbose message on the end of iteration." - } - ], - "docstring": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models." - } - ], - "functions": [ - { - "name": "_check_shape", - "decorators": [], - "parameters": [ - { - "name": "param", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "param_shape", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate the shape of the input parameter 'param'.\n\nParameters\n----------\nparam : array\n\nparam_shape : tuple\n\nname : string" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the input data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nn_components : int\n\nReturns\n-------\nX : array, shape (n_samples, n_features)" - } - ] - }, - { - "name": "sklearn.mixture._bayesian_mixture", - "imports": [ - "import math", - "import numpy as np", - "from scipy.special import betaln", - "from scipy.special import digamma", - "from scipy.special import gammaln", - "from _base import BaseMixture", - "from _base import _check_shape", - "from _gaussian_mixture import _check_precision_matrix", - "from _gaussian_mixture import _check_precision_positivity", - "from _gaussian_mixture import _compute_log_det_cholesky", - "from _gaussian_mixture import _compute_precision_cholesky", - "from _gaussian_mixture import _estimate_gaussian_parameters", - "from _gaussian_mixture import _estimate_log_gaussian_prob", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BayesianGaussianMixture", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of mixture components. Depending on the data and the value of the `weight_concentration_prior` the model can decide to not use all the components by setting some component `weights_` to values very close to zero. The number of effective components is therefore smaller than n_components." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": true, - "default": "'full'", - "limitation": null, - "ignored": false, - "docstring": "String describing the type of covariance parameters to use. Must be one of:: 'full' (each component has its own general covariance matrix), 'tied' (all components share the same general covariance matrix), 'diag' (each component has its own diagonal covariance matrix), 'spherical' (each component has its own single variance)." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The convergence threshold. EM iterations will stop when the lower bound average gain on the likelihood (of the training data with respect to the model) is below this threshold." - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Non-negative regularization added to the diagonal of covariance. Allows to assure that the covariance matrices are all positive." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of EM iterations to perform." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of initializations to perform. The result with the highest lower bound value on the likelihood is kept." - }, - { - "name": "init_params", - "type": "Literal['kmeans', 'random']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The method used to initialize the weights, the means and the covariances. Must be one of:: 'kmeans' : responsibilities are initialized using kmeans. 'random' : responsibilities are initialized randomly." - }, - { - "name": "weight_concentration_prior_type", - "type": "str", - "hasDefault": true, - "default": "'dirichlet_process'", - "limitation": null, - "ignored": false, - "docstring": "String describing the type of the weight concentration prior. Must be one of:: 'dirichlet_process' (using the Stick-breaking representation), 'dirichlet_distribution' (can favor more uniform weights)." - }, - { - "name": "weight_concentration_prior", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dirichlet concentration of each component on the weight distribution (Dirichlet). This is commonly called gamma in the literature. The higher concentration puts more mass in the center and will lead to more components being active, while a lower concentration parameter will lead to more mass at the edge of the mixture weights simplex. The value of the parameter must be greater than 0. If it is None, it's set to ``1. / n_components``." - }, - { - "name": "mean_precision_prior", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The precision prior on the mean distribution (Gaussian). Controls the extent of where means can be placed. Larger values concentrate the cluster means around `mean_prior`. The value of the parameter must be greater than 0. If it is None, it is set to 1." - }, - { - "name": "mean_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prior on the mean distribution (Gaussian). If it is None, it is set to the mean of X." - }, - { - "name": "degrees_of_freedom_prior", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prior of the number of degrees of freedom on the covariance distributions (Wishart). If it is None, it's set to `n_features`." - }, - { - "name": "covariance_prior", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prior on the covariance distribution (Wishart). If it is None, the emiprical covariance prior is initialized using the covariance of X. The shape depends on `covariance_type`:: (n_features, n_features) if 'full', (n_features, n_features) if 'tied', (n_features) if 'diag', float if 'spherical'" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to the method chosen to initialize the parameters (see `init_params`). In addition, it controls the generation of random samples from the fitted distribution (see the method `sample`). Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If 'warm_start' is True, the solution of the last fitting is used as initialization for the next call of fit(). This can speed up convergence when fit is called several times on similar problems. See :term:`the Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints the current initialization and each iteration step. If greater than 1 then it prints also the log probability and the time needed for each step." - }, - { - "name": "verbose_interval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of iteration done before the next print." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the parameters are well defined.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_check_weights_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the parameter of the Dirichlet distribution." - }, - { - "name": "_check_means_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the parameters of the Gaussian distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_check_precision_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the prior parameters of the precision distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_checkcovariance_prior_parameter", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the `covariance_prior_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialization of the mixture parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)" - }, - { - "name": "_estimate_weights", - "decorators": [], - "parameters": [ - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the parameters of the Dirichlet distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)" - }, - { - "name": "_estimate_means", - "decorators": [], - "parameters": [ - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the parameters of the Gaussian distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)" - }, - { - "name": "_estimate_precisions", - "decorators": [], - "parameters": [ - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The shape depends of `covariance_type`: 'full' : (n_components, n_features, n_features) 'tied' : (n_features, n_features) 'diag' : (n_components, n_features) 'spherical' : (n_components,)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the precisions parameters of the precision distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like\n The shape depends of `covariance_type`:\n 'full' : (n_components, n_features, n_features)\n 'tied' : (n_features, n_features)\n 'diag' : (n_components, n_features)\n 'spherical' : (n_components,)" - }, - { - "name": "_estimate_wishart_full", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the full Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components, n_features, n_features)" - }, - { - "name": "_estimate_wishart_tied", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the tied Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_features, n_features)" - }, - { - "name": "_estimate_wishart_diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the diag Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components, n_features)" - }, - { - "name": "_estimate_wishart_spherical", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the spherical Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components,)" - }, - { - "name": "_m_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_estimate_log_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimate_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_lower_bound", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - }, - { - "name": "log_prob_norm", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the probability of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the lower bound of the model.\n\nThe lower bound on the likelihood (of the training data with respect to\nthe model) is used to detect the convergence and has to increase at\neach iteration.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n\nlog_prob_norm : float\n Logarithm of the probability of each sample in X.\n\nReturns\n-------\nlower_bound : float" - }, - { - "name": "_get_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=1\n The number of mixture components. Depending on the data and the value\n of the `weight_concentration_prior` the model can decide to not use\n all the components by setting some component `weights_` to values very\n close to zero. The number of effective components is therefore smaller\n than n_components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of::\n\n 'full' (each component has its own general covariance matrix),\n 'tied' (all components share the same general covariance matrix),\n 'diag' (each component has its own diagonal covariance matrix),\n 'spherical' (each component has its own single variance).\n\ntol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain on the likelihood (of the training data with\n respect to the model) is below this threshold.\n\nreg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n The number of EM iterations to perform.\n\nn_init : int, default=1\n The number of initializations to perform. The result with the highest\n lower bound value on the likelihood is kept.\n\ninit_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n covariances.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\nweight_concentration_prior_type : str, default='dirichlet_process'\n String describing the type of the weight concentration prior.\n Must be one of::\n\n 'dirichlet_process' (using the Stick-breaking representation),\n 'dirichlet_distribution' (can favor more uniform weights).\n\nweight_concentration_prior : float | None, default=None.\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). This is commonly called gamma in the\n literature. The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n mixture weights simplex. The value of the parameter must be greater\n than 0. If it is None, it's set to ``1. / n_components``.\n\nmean_precision_prior : float | None, default=None.\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed. Larger\n values concentrate the cluster means around `mean_prior`.\n The value of the parameter must be greater than 0.\n If it is None, it is set to 1.\n\nmean_prior : array-like, shape (n_features,), default=None.\n The prior on the mean distribution (Gaussian).\n If it is None, it is set to the mean of X.\n\ndegrees_of_freedom_prior : float | None, default=None.\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart). If it is None, it's set to `n_features`.\n\ncovariance_prior : float or array-like, default=None.\n The prior on the covariance distribution (Wishart).\n If it is None, the emiprical covariance prior is initialized using the\n covariance of X. The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n See :term:`the Glossary `.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\nverbose_interval : int, default=10\n Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\ncovariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n Number of step used by the best fit of inference to reach the\n convergence.\n\nlower_bound_ : float\n Lower bound value on the likelihood (of the training data with\n respect to the model) of the best fit of inference.\n\nweight_concentration_prior_ : tuple or float\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). The type depends on\n ``weight_concentration_prior_type``::\n\n (float, float) if 'dirichlet_process' (Beta parameters),\n float if 'dirichlet_distribution' (Dirichlet parameters).\n\n The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n simplex.\n\nweight_concentration_ : array-like of shape (n_components,)\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet).\n\nmean_precision_prior_ : float\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed.\n Larger values concentrate the cluster means around `mean_prior`.\n If mean_precision_prior is set to None, `mean_precision_prior_` is set\n to 1.\n\nmean_precision_ : array-like of shape (n_components,)\n The precision of each components on the mean distribution (Gaussian).\n\nmean_prior_ : array-like of shape (n_features,)\n The prior on the mean distribution (Gaussian).\n\ndegrees_of_freedom_prior_ : float\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart).\n\ndegrees_of_freedom_ : array-like of shape (n_components,)\n The number of degrees of freedom of each components in the model.\n\ncovariance_prior_ : float or array-like\n The prior on the covariance distribution (Wishart).\n The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import BayesianGaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n>>> bgm.means_\narray([[2.49... , 2.29...],\n [8.45..., 4.52... ]])\n>>> bgm.predict([[0, 0], [9, 3]])\narray([0, 1])\n\nSee Also\n--------\nGaussianMixture : Finite Gaussian mixture fit with EM.\n\nReferences\n----------\n\n.. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n learning\". Vol. 4 No. 4. New York: Springer.\n `_\n\n.. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n Graphical Models\". In Advances in Neural Information Processing\n Systems 12.\n `_\n\n.. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n `_" - } - ], - "functions": [ - { - "name": "_log_dirichlet_norm", - "decorators": [], - "parameters": [ - { - "name": "dirichlet_concentration", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The parameters values of the Dirichlet distribution." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log of the Dirichlet distribution normalization term.\n\nParameters\n----------\ndirichlet_concentration : array-like of shape (n_samples,)\n The parameters values of the Dirichlet distribution.\n\nReturns\n-------\nlog_dirichlet_norm : float\n The log normalization of the Dirichlet distribution." - }, - { - "name": "_log_wishart_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log of the Wishart distribution normalization term.\n\nParameters\n----------\ndegrees_of_freedom : array-like of shape (n_components,)\n The number of degrees of freedom on the covariance Wishart\n distributions.\n\nlog_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component.\n\nn_features : int\n The number of features.\n\nReturn\n------\nlog_wishart_norm : array-like of shape (n_components,)\n The log normalization of the Wishart distribution." - } - ] - }, - { - "name": "sklearn.mixture._gaussian_mixture", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from _base import BaseMixture", - "from _base import _check_shape", - "from utils import check_array", - "from utils.extmath import row_norms", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "GaussianMixture", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of mixture components." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": true, - "default": "'full'", - "limitation": null, - "ignored": false, - "docstring": "String describing the type of covariance parameters to use. Must be one of: 'full' each component has its own general covariance matrix 'tied' all components share the same general covariance matrix 'diag' each component has its own diagonal covariance matrix 'spherical' each component has its own single variance" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The convergence threshold. EM iterations will stop when the lower bound average gain is below this threshold." - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Non-negative regularization added to the diagonal of covariance. Allows to assure that the covariance matrices are all positive." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of EM iterations to perform." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of initializations to perform. The best results are kept." - }, - { - "name": "init_params", - "type": "Literal['kmeans', 'random']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The method used to initialize the weights, the means and the precisions. Must be one of:: 'kmeans' : responsibilities are initialized using kmeans. 'random' : responsibilities are initialized randomly." - }, - { - "name": "weights_init", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-provided initial weights. If it is None, weights are initialized using the `init_params` method." - }, - { - "name": "means_init", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-provided initial means, If it is None, means are initialized using the `init_params` method." - }, - { - "name": "precisions_init", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-provided initial precisions (inverse of the covariance matrices). If it is None, precisions are initialized using the 'init_params' method. The shape depends on 'covariance_type':: (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', (n_components, n_features, n_features) if 'full'" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to the method chosen to initialize the parameters (see `init_params`). In addition, it controls the generation of random samples from the fitted distribution (see the method `sample`). Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If 'warm_start' is True, the solution of the last fitting is used as initialization for the next call of fit(). This can speed up convergence when fit is called several times on similar problems. In that case, 'n_init' is ignored and only a single initialization occurs upon the first call. See :term:`the Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints the current initialization and each iteration step. If greater than 1 then it prints also the log probability and the time needed for each step." - }, - { - "name": "verbose_interval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of iteration done before the next print." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the Gaussian mixture parameters are well defined." - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialization of the Gaussian mixture parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)" - }, - { - "name": "_m_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_estimate_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimate_log_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_lower_bound", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_n_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the number of free parameters in the model." - }, - { - "name": "bic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bayesian information criterion for the current model on the input X.\n\nParameters\n----------\nX : array of shape (n_samples, n_dimensions)\n\nReturns\n-------\nbic : float\n The lower the better." - }, - { - "name": "aic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Akaike information criterion for the current model on the input X.\n\nParameters\n----------\nX : array of shape (n_samples, n_dimensions)\n\nReturns\n-------\naic : float\n The lower the better." - } - ], - "docstring": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_components : int, default=1\n The number of mixture components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of:\n\n 'full'\n each component has its own general covariance matrix\n 'tied'\n all components share the same general covariance matrix\n 'diag'\n each component has its own diagonal covariance matrix\n 'spherical'\n each component has its own single variance\n\ntol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain is below this threshold.\n\nreg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n The number of EM iterations to perform.\n\nn_init : int, default=1\n The number of initializations to perform. The best results are kept.\n\ninit_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n precisions.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\nweights_init : array-like of shape (n_components, ), default=None\n The user-provided initial weights.\n If it is None, weights are initialized using the `init_params` method.\n\nmeans_init : array-like of shape (n_components, n_features), default=None\n The user-provided initial means,\n If it is None, means are initialized using the `init_params` method.\n\nprecisions_init : array-like, default=None\n The user-provided initial precisions (inverse of the covariance\n matrices).\n If it is None, precisions are initialized using the 'init_params'\n method.\n The shape depends on 'covariance_type'::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n In that case, 'n_init' is ignored and only a single initialization\n occurs upon the first call.\n See :term:`the Glossary `.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\nverbose_interval : int, default=10\n Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\ncovariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n Number of step used by the best fit of EM to reach the convergence.\n\nlower_bound_ : float\n Lower bound value on the log-likelihood (of the training data with\n respect to the model) of the best fit of EM.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import GaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n>>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n>>> gm.means_\narray([[10., 2.],\n [ 1., 2.]])\n>>> gm.predict([[0, 0], [12, 3]])\narray([1, 0])\n\nSee Also\n--------\nBayesianGaussianMixture : Gaussian mixture model fit with a variational\n inference." - } - ], - "functions": [ - { - "name": "_check_weights", - "decorators": [], - "parameters": [ - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportions of components of each mixture." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the user provided 'weights'.\n\nParameters\n----------\nweights : array-like of shape (n_components,)\n The proportions of components of each mixture.\n\nn_components : int\n Number of components.\n\nReturns\n-------\nweights : array, shape (n_components,)" - }, - { - "name": "_check_means", - "decorators": [], - "parameters": [ - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The centers of the current components." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Validate the provided 'means'.\n\nParameters\n----------\nmeans : array-like of shape (n_components, n_features)\n The centers of the current components.\n\nn_components : int\n Number of components.\n\nn_features : int\n Number of features.\n\nReturns\n-------\nmeans : array, (n_components, n_features)" - }, - { - "name": "_check_precision_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check a precision vector is positive-definite." - }, - { - "name": "_check_precision_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check a precision matrix is symmetric and positive-definite." - }, - { - "name": "_check_precisions_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the precision matrices are symmetric and positive-definite." - }, - { - "name": "_check_precisions", - "decorators": [], - "parameters": [ - { - "name": "precisions", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,)" - }, - { - "name": "covariance_type", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate user provided precisions.\n\nParameters\n----------\nprecisions : array-like\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : string\n\nn_components : int\n Number of components.\n\nn_features : int\n Number of features.\n\nReturns\n-------\nprecisions : array" - }, - { - "name": "_estimate_gaussian_covariances_full", - "decorators": [], - "parameters": [ - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the full covariance matrices.\n\nParameters\n----------\nresp : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariances : array, shape (n_components, n_features, n_features)\n The covariance matrix of the current components." - }, - { - "name": "_estimate_gaussian_covariances_tied", - "decorators": [], - "parameters": [ - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the tied covariance matrix.\n\nParameters\n----------\nresp : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariance : array, shape (n_features, n_features)\n The tied covariance matrix of the components." - }, - { - "name": "_estimate_gaussian_covariances_diag", - "decorators": [], - "parameters": [ - { - "name": "responsibilities", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the diagonal covariance vectors.\n\nParameters\n----------\nresponsibilities : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariances : array, shape (n_components, n_features)\n The covariance vector of the current components." - }, - { - "name": "_estimate_gaussian_covariances_spherical", - "decorators": [], - "parameters": [ - { - "name": "responsibilities", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the spherical variance values.\n\nParameters\n----------\nresponsibilities : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\nvariances : array, shape (n_components,)\n The variance values of each components." - }, - { - "name": "_estimate_gaussian_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data array." - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The responsibilities for each data sample in X." - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization added to the diagonal of the covariance matrices." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The type of precision matrices." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the Gaussian distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data array.\n\nresp : array-like of shape (n_samples, n_components)\n The responsibilities for each data sample in X.\n\nreg_covar : float\n The regularization added to the diagonal of the covariance matrices.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\nReturns\n-------\nnk : array-like of shape (n_components,)\n The numbers of data samples in the current components.\n\nmeans : array-like of shape (n_components, n_features)\n The centers of the current components.\n\ncovariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type." - }, - { - "name": "_compute_precision_cholesky", - "decorators": [], - "parameters": [ - { - "name": "covariances", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The covariance matrix of the current components. The shape depends of the covariance_type." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The type of precision matrices." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Cholesky decomposition of the precisions.\n\nParameters\n----------\ncovariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\nReturns\n-------\nprecisions_cholesky : array-like\n The cholesky decomposition of sample precisions of the current\n components. The shape depends of the covariance_type." - }, - { - "name": "_compute_log_det_cholesky", - "decorators": [], - "parameters": [ - { - "name": "matrix_chol", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cholesky decompositions of the matrices. 'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,)" - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log-det of the cholesky decomposition of matrices.\n\nParameters\n----------\nmatrix_chol : array-like\n Cholesky decompositions of the matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nn_features : int\n Number of features.\n\nReturns\n-------\nlog_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component." - }, - { - "name": "_estimate_log_gaussian_prob", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "precisions_chol", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cholesky decompositions of the precision matrices. 'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,)" - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the log Gaussian probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nmeans : array-like of shape (n_components, n_features)\n\nprecisions_chol : array-like\n Cholesky decompositions of the precision matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_components)" - } - ] - }, - { - "name": "sklearn.mixture", - "imports": [ - "from _gaussian_mixture import GaussianMixture", - "from _bayesian_mixture import BayesianGaussianMixture" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.mixture.tests.test_bayesian_mixture", - "imports": [ - "import copy", - "import numpy as np", - "from scipy.special import gammaln", - "import pytest", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm", - "from sklearn.mixture._bayesian_mixture import _log_wishart_norm", - "from sklearn.mixture import BayesianGaussianMixture", - "from sklearn.mixture.tests.test_gaussian_mixture import RandomData", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_log_dirichlet_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_wishart_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_covariance_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_weight_concentration_prior_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_weights_prior_initialisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_mean_prior_initialisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_precisions_prior_initialisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_monotonic_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compare_covar_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_covariance_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invariant_translation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_fit_predict_n_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_predict_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.mixture.tests.test_gaussian_mixture", - "imports": [ - "import sys", - "import copy", - "import warnings", - "import pytest", - "import numpy as np", - "from scipy import stats", - "from scipy import linalg", - "from sklearn.covariance import EmpiricalCovariance", - "from sklearn.datasets import make_spd_matrix", - "from io import StringIO", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.mixture import GaussianMixture", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_full", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_tied", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_diag", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_spherical", - "from sklearn.mixture._gaussian_mixture import _compute_precision_cholesky", - "from sklearn.mixture._gaussian_mixture import _compute_log_det_cholesky", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils.extmath import fast_logdet", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.mixture._base import _check_X", - "from sklearn.mixture._gaussian_mixture import _estimate_log_gaussian_prob" - ], - "classes": [ - { - "name": "RandomData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "generate_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_means", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_precisions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suffstat_sk_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suffstat_sk_tied", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suffstat_sk_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_suffstat_sk_spherical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_log_det_cholesky", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_naive_lmvnpdf_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_log_probabilities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_estimate_log_prob_resp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_predict_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_predict_n_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_best_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiple_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_n_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bic_1d_1component", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_aic_bic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_detected_with_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_monotonic_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regularisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.mixture.tests.test_mixture", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.mixture import GaussianMixture", - "from sklearn.mixture import BayesianGaussianMixture" - ], - "classes": [], - "functions": [ - { - "name": "test_gaussian_mixture_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.mixture.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.model_selection._search", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from collections import defaultdict", - "from collections.abc import Mapping", - "from collections.abc import Sequence", - "from collections.abc import Iterable", - "from functools import partial", - "from functools import reduce", - "from itertools import product", - "import numbers", - "import operator", - "import time", - "import warnings", - "import numpy as np", - "from numpy.ma import MaskedArray", - "from scipy.stats import rankdata", - "from base import BaseEstimator", - "from base import is_classifier", - "from base import clone", - "from base import MetaEstimatorMixin", - "from _split import check_cv", - "from _validation import _fit_and_score", - "from _validation import _aggregate_score_dicts", - "from _validation import _insert_error_scores", - "from _validation import _normalize_score_results", - "from exceptions import NotFittedError", - "from joblib import Parallel", - "from utils import check_random_state", - "from utils.random import sample_without_replacement", - "from utils._tags import _safe_tags", - "from utils.validation import indexable", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_fit_params", - "from utils.validation import _deprecate_positional_args", - "from utils.metaestimators import if_delegate_has_method", - "from utils.fixes import delayed", - "from metrics._scorer import _check_multimetric_scoring", - "from metrics import check_scoring", - "from utils import deprecated" - ], - "classes": [ - { - "name": "ParameterGrid", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "param_grid", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The parameter grid to explore, as a dictionary mapping estimator parameters to sequences of allowed values. An empty dict signifies default parameters. A sequence of dicts signifies a sequence of grids to search, and is useful to avoid exploring parameter combinations that make no sense or have no effect. See the examples below." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Iterate over the points in the grid.\n\nReturns\n-------\nparams : iterator over dict of str to any\n Yields dictionaries mapping each estimator parameter to one of its\n allowed values." - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Number of points on the grid." - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [ - { - "name": "ind", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The iteration index" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the parameters that would be ``ind``th in iteration\n\nParameters\n----------\nind : int\n The iteration index\n\nReturns\n-------\nparams : dict of str to any\n Equal to list(self)[ind]" - } - ], - "docstring": "Grid of parameters with a discrete number of values for each.\n\nCan be used to iterate over parameter value combinations with the\nPython built-in function iter.\nThe order of the generated parameter combinations is deterministic.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nparam_grid : dict of str to sequence, or sequence of such\n The parameter grid to explore, as a dictionary mapping estimator\n parameters to sequences of allowed values.\n\n An empty dict signifies default parameters.\n\n A sequence of dicts signifies a sequence of grids to search, and is\n useful to avoid exploring parameter combinations that make no sense\n or have no effect. See the examples below.\n\nExamples\n--------\n>>> from sklearn.model_selection import ParameterGrid\n>>> param_grid = {'a': [1, 2], 'b': [True, False]}\n>>> list(ParameterGrid(param_grid)) == (\n... [{'a': 1, 'b': True}, {'a': 1, 'b': False},\n... {'a': 2, 'b': True}, {'a': 2, 'b': False}])\nTrue\n\n>>> grid = [{'kernel': ['linear']}, {'kernel': ['rbf'], 'gamma': [1, 10]}]\n>>> list(ParameterGrid(grid)) == [{'kernel': 'linear'},\n... {'kernel': 'rbf', 'gamma': 1},\n... {'kernel': 'rbf', 'gamma': 10}]\nTrue\n>>> ParameterGrid(grid)[1] == {'kernel': 'rbf', 'gamma': 1}\nTrue\n\nSee Also\n--------\nGridSearchCV : Uses :class:`ParameterGrid` to perform a full parallelized\n parameter search." - }, - { - "name": "ParameterSampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "param_distributions", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (`str`) as keys and distributions or lists of parameters to try. Distributions must provide a ``rvs`` method for sampling (such as those from scipy.stats.distributions). If a list is given, it is sampled uniformly. If a list of dicts is given, first a dict is sampled uniformly, and then a parameter is sampled using that dict as above." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parameter settings that are produced." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_all_lists", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Number of points that will be sampled." - } - ], - "docstring": "Generator on parameters sampled from given distributions.\n\nNon-deterministic iterable over random candidate combinations for hyper-\nparameter search. If all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nparam_distributions : dict\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\nn_iter : int\n Number of parameter settings that are produced.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nparams : dict of str to any\n **Yields** dictionaries mapping each estimator parameter to\n as sampled value.\n\nExamples\n--------\n>>> from sklearn.model_selection import ParameterSampler\n>>> from scipy.stats.distributions import expon\n>>> import numpy as np\n>>> rng = np.random.RandomState(0)\n>>> param_grid = {'a':[1, 2], 'b': expon()}\n>>> param_list = list(ParameterSampler(param_grid, n_iter=4,\n... random_state=rng))\n>>> rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items())\n... for d in param_list]\n>>> rounded_list == [{'b': 0.89856, 'a': 1},\n... {'b': 0.923223, 'a': 1},\n... {'b': 1.878964, 'a': 2},\n... {'b': 1.038159, 'a': 2}]\nTrue" - }, - { - "name": "BaseSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimator_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the score on the given data, if the estimator has been refit.\n\nThis uses the score defined by ``scoring`` where provided, and the\n``best_estimator_.score`` method otherwise.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_output) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nReturns\n-------\nscore : float" - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call score_samples on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``score_samples``.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements\n of the underlying estimator.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)" - }, - { - "name": "_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call predict on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call predict_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_proba``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call predict_log_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_log_proba``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call decision_function on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``decision_function``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call transform on the estimator with the best found parameters.\n\nOnly available if the underlying estimator supports ``transform`` and\n``refit=True``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xt", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call inverse_transform on the estimator with the best found params.\n\nOnly available if the underlying estimator implements\n``inverse_transform`` and ``refit=True``.\n\nParameters\n----------\nXt : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [ - { - "name": "evaluate_candidates", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This callback accepts: - a list of candidates, where each candidate is a dict of parameter settings. - an optional `cv` parameter which can be used to e.g. evaluate candidates on different dataset splits, or evaluate candidates on subsampled data (as done in the SucessiveHaling estimators). By default, the original `cv` parameter is used, and it is available as a private `_checked_cv_orig` attribute. - an optional `more_results` dict. Each key will be added to the `cv_results_` attribute. Values should be lists of length `n_candidates` It returns a dict of all results so far, formatted like ``cv_results_``. Important note (relevant whether the default cv is used or not): in randomized splitters, and unless the random_state parameter of cv was set to an int, calling cv.split() multiple times will yield different splits. Since cv.split() is called in evaluate_candidates, this means that candidates will be evaluated on different splits each time evaluate_candidates is called. This might be a methodological issue depending on the search strategy that you're implementing. To prevent randomized splitters from being used, you may use _split._yields_constant_splits()" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Repeatedly calls `evaluate_candidates` to conduct a search.\n\nThis method, implemented in sub-classes, makes it possible to\ncustomize the the scheduling of evaluations: GridSearchCV and\nRandomizedSearchCV schedule evaluations for their whole parameter\nsearch space at once but other more sequential approaches are also\npossible: for instance is possible to iteratively schedule evaluations\nfor new regions of the parameter search space based on previously\ncollected evaluation results. This makes it possible to implement\nBayesian optimization or more generally sequential model-based\noptimization by deriving from the BaseSearchCV abstract base class.\nFor example, Successive Halving is implemented by calling\n`evaluate_candidates` multiples times (once per iteration of the SH\nprocess), each time passing a different set of candidates with `X`\nand `y` of increasing sizes.\n\nParameters\n----------\nevaluate_candidates : callable\n This callback accepts:\n - a list of candidates, where each candidate is a dict of\n parameter settings.\n - an optional `cv` parameter which can be used to e.g.\n evaluate candidates on different dataset splits, or\n evaluate candidates on subsampled data (as done in the\n SucessiveHaling estimators). By default, the original `cv`\n parameter is used, and it is available as a private\n `_checked_cv_orig` attribute.\n - an optional `more_results` dict. Each key will be added to\n the `cv_results_` attribute. Values should be lists of\n length `n_candidates`\n\n It returns a dict of all results so far, formatted like\n ``cv_results_``.\n\n Important note (relevant whether the default cv is used or not):\n in randomized splitters, and unless the random_state parameter of\n cv was set to an int, calling cv.split() multiple times will\n yield different splits. Since cv.split() is called in\n evaluate_candidates, this means that candidates will be evaluated\n on different splits each time evaluate_candidates is called. This\n might be a methodological issue depending on the search strategy\n that you're implementing. To prevent randomized splitters from\n being used, you may use _split._yields_constant_splits()\n\nExamples\n--------\n\n::\n\n def _run_search(self, evaluate_candidates):\n 'Try C=0.1 only if C=1 is better than C=10'\n all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n score = all_results['mean_test_score']\n if score[0] < score[1]:\n evaluate_candidates([{'C': 0.1}])" - }, - { - "name": "_check_refit_for_multimetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check `refit` is compatible with `scores` is valid" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the estimator" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Run fit with all sets of parameters.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_output) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n**fit_params : dict of str -> object\n Parameters passed to the ``fit`` method of the estimator" - }, - { - "name": "_format_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Abstract base class for hyper parameter search with cross-validation.\n " - }, - { - "name": "GridSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_grid", - "type": "Union[Dict, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (`str`) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings." - }, - { - "name": "scoring", - "type": "Union[Callable, str, Dict]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "pre_dispatch", - "type": "int", - "hasDefault": true, - "default": "n_jobs", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "refit", - "type": "Union[str, bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given ``cv_results_``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``GridSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. .. versionchanged:: 0.20 Support for callable added." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages. - >1 : the computation time for each fold and parameter candidate is displayed; - >2 : the score is also displayed; - >3 : the fold and candidate parameter indexes are also displayed together with the starting time of the computation." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error." - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False``" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Search all candidates in param_grid" - } - ], - "docstring": "Exhaustive search over specified parameter values for an estimator.\n\nImportant members are fit, predict.\n\nGridSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated grid-search over a parameter grid.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n Dictionary with parameters names (`str`) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\npre_dispatch : int, or str, default=n_jobs\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nrefit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given ``cv_results_``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``GridSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\n - >1 : the computation time for each fold and parameter candidate is\n displayed;\n - >2 : the score is also displayed;\n - >3 : the fold and candidate parameter indexes are also displayed\n together with the starting time of the computation.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\n\nExamples\n--------\n>>> from sklearn import svm, datasets\n>>> from sklearn.model_selection import GridSearchCV\n>>> iris = datasets.load_iris()\n>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}\n>>> svc = svm.SVC()\n>>> clf = GridSearchCV(svc, parameters)\n>>> clf.fit(iris.data, iris.target)\nGridSearchCV(estimator=SVC(),\n param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})\n>>> sorted(clf.cv_results_.keys())\n['mean_fit_time', 'mean_score_time', 'mean_test_score',...\n 'param_C', 'param_kernel', 'params',...\n 'rank_test_score', 'split0_test_score',...\n 'split2_test_score', ...\n 'std_fit_time', 'std_score_time', 'std_test_score']\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +------------+-----------+------------+-----------------+---+---------+\n |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|\n +============+===========+============+=================+===+=========+\n | 'poly' | -- | 2 | 0.80 |...| 2 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'poly' | -- | 3 | 0.70 |...| 4 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.1 | -- | 0.80 |...| 3 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.2 | -- | 0.93 |...| 1 |\n +------------+-----------+------------+-----------------+---+---------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],\n mask = [False False False False]...)\n 'param_gamma': masked_array(data = [-- -- 0.1 0.2],\n mask = [ True True False False]...),\n 'param_degree': masked_array(data = [2.0 3.0 -- --],\n mask = [False False True True]...),\n 'split0_test_score' : [0.80, 0.70, 0.80, 0.93],\n 'split1_test_score' : [0.82, 0.50, 0.70, 0.78],\n 'mean_test_score' : [0.81, 0.60, 0.75, 0.85],\n 'std_test_score' : [0.01, 0.10, 0.05, 0.08],\n 'rank_test_score' : [2, 4, 3, 1],\n 'split0_train_score' : [0.80, 0.92, 0.70, 0.93],\n 'split1_train_score' : [0.82, 0.55, 0.70, 0.87],\n 'mean_train_score' : [0.81, 0.74, 0.70, 0.90],\n 'std_train_score' : [0.01, 0.19, 0.00, 0.03],\n 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],\n 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00, 0.01],\n 'params' : [{'kernel': 'poly', 'degree': 2}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\n This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\nmultimetric_ : bool\n Whether or not the scorers compute several metrics.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the left out\ndata, unless an explicit score is passed in which case it is used instead.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\npoint in the grid (and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available. A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nSee Also\n---------\nParameterGrid : Generates all the combinations of a hyperparameter grid.\ntrain_test_split : Utility function to split the data into a development\n set usable for fitting a GridSearchCV instance and an evaluation set\n for its final evaluation.\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function." - }, - { - "name": "RandomizedSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A object of that type is instantiated for each grid point. This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_distributions", - "type": "Union[Dict, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (`str`) as keys and distributions or lists of parameters to try. Distributions must provide a ``rvs`` method for sampling (such as those from scipy.stats.distributions). If a list is given, it is sampled uniformly. If a list of dicts is given, first a dict is sampled uniformly, and then a parameter is sampled using that dict as above." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of parameter settings that are sampled. n_iter trades off runtime vs quality of the solution." - }, - { - "name": "scoring", - "type": "Union[Callable, str, Dict]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "pre_dispatch", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "refit", - "type": "Union[str, bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given the ``cv_results``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``RandomizedSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. .. versionchanged:: 0.20 Support for callable added." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error." - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False``" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Search n_iter candidates from param_distributions" - } - ], - "docstring": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : estimator object.\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_distributions : dict or list of dicts\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\nn_iter : int, default=10\n Number of parameter settings that are sampled. n_iter trades\n off runtime vs quality of the solution.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\npre_dispatch : int, or str, default=None\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nrefit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given the ``cv_results``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``RandomizedSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +--------------+-------------+-------------------+---+---------------+\n | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n +==============+=============+===================+===+===============+\n | 'rbf' | 0.1 | 0.80 |...| 1 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.2 | 0.84 |...| 3 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.3 | 0.70 |...| 2 |\n +--------------+-------------+-------------------+---+---------------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n mask = False),\n 'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),\n 'split0_test_score' : [0.80, 0.84, 0.70],\n 'split1_test_score' : [0.82, 0.50, 0.70],\n 'mean_test_score' : [0.81, 0.67, 0.70],\n 'std_test_score' : [0.01, 0.24, 0.00],\n 'rank_test_score' : [1, 3, 2],\n 'split0_train_score' : [0.80, 0.92, 0.70],\n 'split1_train_score' : [0.82, 0.55, 0.70],\n 'mean_train_score' : [0.81, 0.74, 0.70],\n 'std_train_score' : [0.01, 0.19, 0.00],\n 'mean_fit_time' : [0.73, 0.63, 0.43],\n 'std_fit_time' : [0.01, 0.02, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00],\n 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n For multi-metric evaluation, this attribute is present only if\n ``refit`` is specified.\n\n See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\n This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\nmultimetric_ : bool\n Whether or not the scorers compute several metrics.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\nparameter setting(and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available. A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nSee Also\n--------\nGridSearchCV : Does exhaustive search over a grid of parameters.\nParameterSampler : A generator over parameter settings, constructed from\n param_distributions.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import RandomizedSearchCV\n>>> from scipy.stats import uniform\n>>> iris = load_iris()\n>>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n... random_state=0)\n>>> distributions = dict(C=uniform(loc=0, scale=4),\n... penalty=['l2', 'l1'])\n>>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n>>> search = clf.fit(iris.data, iris.target)\n>>> search.best_params_\n{'C': 2..., 'penalty': 'l1'}" - } - ], - "functions": [ - { - "name": "fit_grid_point", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[List, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for input data." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A object of that type is instantiated for each grid point. This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "parameters", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to be set on estimator for this grid point." - }, - { - "name": "train", - "type": "Union[NDArray, bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Boolean mask or indices for training set." - }, - { - "name": "test", - "type": "Union[NDArray, bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Boolean mask or indices for test set." - }, - { - "name": "scorer", - "type": "Optional[Callable]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The scorer callable object / function must have its signature as ``scorer(estimator, X, y)``. If ``None`` the estimator's score method is used." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "**fit_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameter passed to the fit function of the estimator." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Run fit on one set of parameters.\n\nParameters\n----------\nX : array-like, sparse matrix or list\n Input data.\n\ny : array-like or None\n Targets for input data.\n\nestimator : estimator object\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparameters : dict\n Parameters to be set on estimator for this grid point.\n\ntrain : ndarray, dtype int or bool\n Boolean mask or indices for training set.\n\ntest : ndarray, dtype int or bool\n Boolean mask or indices for test set.\n\nscorer : callable or None\n The scorer callable object / function must have its signature as\n ``scorer(estimator, X, y)``.\n\n If ``None`` the estimator's score method is used.\n\nverbose : int\n Verbosity level.\n\n**fit_params : kwargs\n Additional parameter passed to the fit function of the estimator.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nReturns\n-------\nscore : float\n Score of this parameter setting on given test split.\n\nparameters : dict\n The parameters that have been evaluated.\n\nn_samples_test : int\n Number of test samples in this split." - }, - { - "name": "_check_param_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection._search_successive_halving", - "imports": [ - "from math import ceil", - "from math import floor", - "from math import log", - "from abc import abstractmethod", - "from numbers import Integral", - "import numpy as np", - "from _search import _check_param_grid", - "from _search import BaseSearchCV", - "from None import ParameterGrid", - "from None import ParameterSampler", - "from utils.validation import _num_samples", - "from base import is_classifier", - "from _split import check_cv", - "from _split import _yields_constant_splits", - "from utils import resample" - ], - "classes": [ - { - "name": "_SubsampleMetaSplitter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Splitter that subsamples a given fraction of the dataset" - }, - { - "name": "BaseSuccessiveHalving", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_input_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the estimator" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Run fit with all sets of parameters.\n\nParameters\n----------\n\nX : array-like, shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_output), optional\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator" - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_candidate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh" - }, - { - "name": "HalvingGridSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_grid", - "type": "Union[Dict, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (string) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings." - }, - { - "name": "factor", - "type": "Union[float, int]", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The 'halving' parameter, which determines the proportion of candidates that are selected for each subsequent iteration. For example, ``factor=3`` means that only one third of the candidates are selected." - }, - { - "name": "resource", - "type": "str", - "hasDefault": true, - "default": "'n_samples'", - "limitation": null, - "ignored": false, - "docstring": "Defines the resource that increases with each iteration. By default, the resource is the number of samples. It can also be set to any parameter of the base estimator that accepts positive integer values, e.g. 'n_iterations' or 'n_estimators' for a gradient boosting estimator. In this case ``max_resources`` cannot be 'auto' and must be set explicitly." - }, - { - "name": "max_resources", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The maximum amount of resource that any candidate is allowed to use for a given iteration. By default, this is set to ``n_samples`` when ``resource='n_samples'`` (default), else an error is raised." - }, - { - "name": "min_resources", - "type": "Literal['exhaust', 'smallest']", - "hasDefault": true, - "default": "'exhaust'", - "limitation": null, - "ignored": false, - "docstring": "The minimum amount of resource that any candidate is allowed to use for a given iteration. Equivalently, this defines the amount of resources `r0` that are allocated for each candidate at the first iteration. - 'smallest' is a heuristic that sets `r0` to a small value: - ``n_splits * 2`` when ``resource='n_samples'`` for a regression problem - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a classification problem - ``1`` when ``resource != 'n_samples'`` - 'exhaust' will set `r0` such that the **last** iteration uses as much resources as possible. Namely, the last iteration will use the highest value smaller than ``max_resources`` that is a multiple of both ``min_resources`` and ``factor``. In general, using 'exhaust' leads to a more accurate estimator, but is slightly more time consuming. Note that the amount of resources used at each iteration is always a multiple of ``min_resources``." - }, - { - "name": "aggressive_elimination", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This is only relevant in cases where there isn't enough resources to reduce the remaining candidates to at most `factor` after the last iteration. If ``True``, then the search process will 'replay' the first iteration for as long as needed until the number of candidates is small enough. This is ``False`` by default, which means that the last iteration may evaluate more than ``factor`` candidates. See :ref:`aggressive_elimination` for more details." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: Due to implementation details, the folds produced by `cv` must be the same across multiple calls to `cv.split()`. For built-in `scikit-learn` iterators, this can be achieved by deactivating shuffling (`shuffle=False`), or by setting the `cv`'s `random_state` parameter to an integer." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. If None, the estimator's score method is used." - }, - { - "name": "refit", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, refit an estimator using the best found parameters on the whole dataset. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``HalvingGridSearchCV`` instance." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. Default is ``np.nan``" - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for subsampling the dataset when `resources != 'n_samples'`. Ignored otherwise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_candidate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide `.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingGridSearchCV\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n Dictionary with parameters names (string) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\nfactor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\nmax_resources : int, default='auto'\n The maximum amount of resource that any candidate is allowed to use\n for a given iteration. By default, this is set to ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\nscoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\nrefit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingGridSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Ignored otherwise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\nn_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used\n at each iteration must be a multiple of ``min_resources_``, the\n actual number of resources used at the last iteration may be smaller\n than ``max_resources_``.\n\nmin_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\nn_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\nn_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\nn_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\nbest_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\nSee Also\n--------\n:class:`HalvingRandomSearchCV`:\n Random search over a set of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> from sklearn.model_selection import HalvingGridSearchCV\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n...\n>>> param_grid = {\"max_depth\": [3, None],\n... \"min_samples_split\": [5, 10]}\n>>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n... max_resources=10,\n... random_state=0).fit(X, y)\n>>> search.best_params_ # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}" - }, - { - "name": "HalvingRandomSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_distributions", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (string) as keys and distributions or lists of parameters to try. Distributions must provide a ``rvs`` method for sampling (such as those from scipy.stats.distributions). If a list is given, it is sampled uniformly." - }, - { - "name": "n_candidates", - "type": "int", - "hasDefault": true, - "default": "'exhaust'", - "limitation": null, - "ignored": false, - "docstring": "The number of candidate parameters to sample, at the first iteration. Using 'exhaust' will sample enough candidates so that the last iteration uses as many resources as possible, based on `min_resources`, `max_resources` and `factor`. In this case, `min_resources` cannot be 'exhaust'." - }, - { - "name": "factor", - "type": "Union[float, int]", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The 'halving' parameter, which determines the proportion of candidates that are selected for each subsequent iteration. For example, ``factor=3`` means that only one third of the candidates are selected." - }, - { - "name": "resource", - "type": "str", - "hasDefault": true, - "default": "'n_samples'", - "limitation": null, - "ignored": false, - "docstring": "Defines the resource that increases with each iteration. By default, the resource is the number of samples. It can also be set to any parameter of the base estimator that accepts positive integer values, e.g. 'n_iterations' or 'n_estimators' for a gradient boosting estimator. In this case ``max_resources`` cannot be 'auto' and must be set explicitly." - }, - { - "name": "max_resources", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of resources that any candidate is allowed to use for a given iteration. By default, this is set ``n_samples`` when ``resource='n_samples'`` (default), else an error is raised." - }, - { - "name": "min_resources", - "type": "Literal['exhaust', 'smallest']", - "hasDefault": true, - "default": "'smallest'", - "limitation": null, - "ignored": false, - "docstring": "The minimum amount of resource that any candidate is allowed to use for a given iteration. Equivalently, this defines the amount of resources `r0` that are allocated for each candidate at the first iteration. - 'smallest' is a heuristic that sets `r0` to a small value: - ``n_splits * 2`` when ``resource='n_samples'`` for a regression problem - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a classification problem - ``1`` when ``resource != 'n_samples'`` - 'exhaust' will set `r0` such that the **last** iteration uses as much resources as possible. Namely, the last iteration will use the highest value smaller than ``max_resources`` that is a multiple of both ``min_resources`` and ``factor``. In general, using 'exhaust' leads to a more accurate estimator, but is slightly more time consuming. 'exhaust' isn't available when `n_candidates='exhaust'`. Note that the amount of resources used at each iteration is always a multiple of ``min_resources``." - }, - { - "name": "aggressive_elimination", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This is only relevant in cases where there isn't enough resources to reduce the remaining candidates to at most `factor` after the last iteration. If ``True``, then the search process will 'replay' the first iteration for as long as needed until the number of candidates is small enough. This is ``False`` by default, which means that the last iteration may evaluate more than ``factor`` candidates. See :ref:`aggressive_elimination` for more details." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: Due to implementation details, the folds produced by `cv` must be the same across multiple calls to `cv.split()`. For built-in `scikit-learn` iterators, this can be achieved by deactivating shuffling (`shuffle=False`), or by setting the `cv`'s `random_state` parameter to an integer." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. If None, the estimator's score method is used." - }, - { - "name": "refit", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, refit an estimator using the best found parameters on the whole dataset. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``HalvingRandomSearchCV`` instance." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. Default is ``np.nan``" - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for subsampling the dataset when `resources != 'n_samples'`. Also used for random uniform sampling from lists of possible values instead of scipy.stats distributions. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_candidate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide`.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingRandomSearchCV\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_distributions : dict\n Dictionary with parameters names (string) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n\nn_candidates : int, default='exhaust'\n The number of candidate parameters to sample, at the first\n iteration. Using 'exhaust' will sample enough candidates so that the\n last iteration uses as many resources as possible, based on\n `min_resources`, `max_resources` and `factor`. In this case,\n `min_resources` cannot be 'exhaust'.\n\nfactor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\nmax_resources : int, default='auto'\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. By default, this is set ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='smallest'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or an iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\nscoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\nrefit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingRandomSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Also used for random uniform\n sampling from lists of possible values instead of scipy.stats\n distributions.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\nn_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used at\n each iteration must be a multiple of ``min_resources_``, the actual\n number of resources used at the last iteration may be smaller than\n ``max_resources_``.\n\nmin_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\nn_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\nn_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\nn_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\nbest_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\nSee Also\n--------\n:class:`HalvingGridSearchCV`:\n Search over a grid of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> from sklearn.model_selection import HalvingRandomSearchCV\n>>> from scipy.stats import randint\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n>>> np.random.seed(0)\n...\n>>> param_distributions = {\"max_depth\": [3, None],\n... \"min_samples_split\": randint(2, 11)}\n>>> search = HalvingRandomSearchCV(clf, param_distributions,\n... resource='n_estimators',\n... max_resources=10,\n... random_state=0).fit(X, y)\n>>> search.best_params_ # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}" - } - ], - "functions": [ - { - "name": "_refit_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_top_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection._split", - "imports": [ - "from collections.abc import Iterable", - "import warnings", - "from itertools import chain", - "from itertools import combinations", - "from math import ceil", - "from math import floor", - "import numbers", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from inspect import signature", - "import numpy as np", - "from scipy.special import comb", - "from utils import indexable", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils import _approximate_mode", - "from utils.validation import _num_samples", - "from utils.validation import column_or_1d", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import type_of_target", - "from base import _pprint" - ], - "classes": [ - { - "name": "BaseCrossValidator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates boolean masks corresponding to test sets.\n\nBy default, delegates to _iter_test_indices(X, y, groups)" - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates integer indices corresponding to test sets." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for all cross-validators\n\nImplementations must define `_iter_test_masks` or `_iter_test_indices`." - }, - { - "name": "LeaveOneOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - } - ], - "docstring": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneOut\n>>> X = np.array([[1, 2], [3, 4]])\n>>> y = np.array([1, 2])\n>>> loo = LeaveOneOut()\n>>> loo.get_n_splits(X)\n2\n>>> print(loo)\nLeaveOneOut()\n>>> for train_index, test_index in loo.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [1] TEST: [0]\n[[3 4]] [[1 2]] [2] [1]\nTRAIN: [0] TEST: [1]\n[[1 2]] [[3 4]] [1] [2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit,\n domain-specific stratification of the dataset.\nGroupKFold : K-fold iterator variant with non-overlapping groups." - }, - { - "name": "LeavePOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "p", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Size of the test sets. Must be strictly less than the number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility." - } - ], - "docstring": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\np : int\n Size of the test sets. Must be strictly less than the number of\n samples.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> lpo = LeavePOut(2)\n>>> lpo.get_n_splits(X)\n6\n>>> print(lpo)\nLeavePOut(p=2)\n>>> for train_index, test_index in lpo.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [0 2] TEST: [1 3]\nTRAIN: [0 1] TEST: [2 3]" - }, - { - "name": "_BaseKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - } - ], - "docstring": "Base class for KFold, GroupKFold, and StratifiedKFold" - }, - { - "name": "KFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the data before splitting into batches. Note that the samples within each split will not be shuffled." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When `shuffle` is True, `random_state` affects the ordering of the indices, which controls the randomness of each fold. Otherwise, this parameter has no effect. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n Whether to shuffle the data before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold. Otherwise, this\n parameter has no effect.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import KFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4])\n>>> kf = KFold(n_splits=2)\n>>> kf.get_n_splits(X)\n2\n>>> print(kf)\nKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in kf.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [0 1] TEST: [2 3]\n\nNotes\n-----\nThe first ``n_samples % n_splits`` folds have size\n``n_samples // n_splits + 1``, other folds have size\n``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nStratifiedKFold : Takes group information into account to avoid building\n folds with imbalanced class distributions (for binary or multiclass\n classification tasks).\n\nGroupKFold : K-fold iterator variant with non-overlapping groups.\n\nRepeatedKFold : Repeats K-Fold n times." - }, - { - "name": "GroupKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "K-fold iterator variant with non-overlapping groups.\n\nThe same group will not appear in two different folds (the number of\ndistinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupKFold\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> groups = np.array([0, 0, 2, 2])\n>>> group_kfold = GroupKFold(n_splits=2)\n>>> group_kfold.get_n_splits(X, y, groups)\n2\n>>> print(group_kfold)\nGroupKFold(n_splits=2)\n>>> for train_index, test_index in group_kfold.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\n...\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [3 4]\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [3 4] [1 2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit\n domain-specific stratification of the dataset." - }, - { - "name": "StratifiedKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle each class's samples before splitting into batches. Note that the samples within each split will not be shuffled." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When `shuffle` is True, `random_state` affects the ordering of the indices, which controls the randomness of each fold for each class. Otherwise, leave `random_state` as `None`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_test_folds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features. Note that providing ``y`` is sufficient to generate the splits and hence ``np.zeros(n_samples)`` may be used as a placeholder for ``X`` instead of actual training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems. Stratification is done based on the y labels." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - } - ], - "docstring": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n Whether to shuffle each class's samples before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold for each class.\n Otherwise, leave `random_state` as `None`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> skf = StratifiedKFold(n_splits=2)\n>>> skf.get_n_splits(X, y)\n2\n>>> print(skf)\nStratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in skf.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Generate test sets such that all contain the same distribution of\n classes, or as close as possible.\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n ``y = [1, 0]`` should not change the indices generated.\n* Preserve order dependencies in the dataset ordering, when\n ``shuffle=False``: all samples from class k in some test set were\n contiguous in y, or separated in y by samples from classes other than k.\n* Generate test sets where the smallest and largest differ by at most one\n sample.\n\n.. versionchanged:: 0.22\n The previous implementation did not follow the last constraint.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times." - }, - { - "name": "TimeSeriesSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of splits. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - }, - { - "name": "max_train_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum size for a single training set." - }, - { - "name": "test_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to limit the size of the test set. Defaults to ``n_samples // (n_splits + 1)``, which is the maximum allowed value with ``gap=0``. .. versionadded:: 0.24" - }, - { - "name": "gap", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to exclude from the end of each train set before the test set. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_splits : int, default=5\n Number of splits. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nmax_train_size : int, default=None\n Maximum size for a single training set.\n\ntest_size : int, default=None\n Used to limit the size of the test set. Defaults to\n ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n with ``gap=0``.\n\n .. versionadded:: 0.24\n\ngap : int, default=0\n Number of samples to exclude from the end of each train set before\n the test set.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import TimeSeriesSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> tscv = TimeSeriesSplit()\n>>> print(tscv)\nTimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0] TEST: [1]\nTRAIN: [0 1] TEST: [2]\nTRAIN: [0 1 2] TEST: [3]\nTRAIN: [0 1 2 3] TEST: [4]\nTRAIN: [0 1 2 3 4] TEST: [5]\n>>> # Fix test_size to 2 with 12 samples\n>>> X = np.random.randn(12, 2)\n>>> y = np.random.randint(0, 2, 12)\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3 4 5] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10 11]\n>>> # Add in a 2 period gap\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [10 11]\n\nNotes\n-----\nThe training set has size ``i * n_samples // (n_splits + 1)\n+ n_samples % (n_splits + 1)`` in the ``i`` th split,\nwith a test set of size ``n_samples//(n_splits + 1)`` by default,\nwhere ``n_samples`` is the number of samples." - }, - { - "name": "LeaveOneGroupOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. This 'groups' parameter must always be specified to calculate the number of splits, though the other parameters can be omitted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneGroupOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 1, 2])\n>>> groups = np.array([1, 1, 2, 2])\n>>> logo = LeaveOneGroupOut()\n>>> logo.get_n_splits(X, y, groups)\n2\n>>> logo.get_n_splits(groups=groups) # 'groups' is always required\n2\n>>> print(logo)\nLeaveOneGroupOut()\n>>> for train_index, test_index in logo.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [1 2] [1 2]\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [1 2]" - }, - { - "name": "LeavePGroupsOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_groups", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of groups (``p``) to leave out in the test split." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. This 'groups' parameter must always be specified to calculate the number of splits, though the other parameters can be omitted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_groups : int\n Number of groups (``p``) to leave out in the test split.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePGroupsOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1])\n>>> groups = np.array([1, 2, 3])\n>>> lpgo = LeavePGroupsOut(n_groups=2)\n>>> lpgo.get_n_splits(X, y, groups)\n3\n>>> lpgo.get_n_splits(groups=groups) # 'groups' is always required\n3\n>>> print(lpgo)\nLeavePGroupsOut(n_groups=2)\n>>> for train_index, test_index in lpgo.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [2] TEST: [0 1]\n[[5 6]] [[1 2]\n [3 4]] [1] [1 2]\nTRAIN: [1] TEST: [0 2]\n[[3 4]] [[1 2]\n [5 6]] [2] [1 1]\nTRAIN: [0] TEST: [1 2]\n[[1 2]] [[3 4]\n [5 6]] [1] [2 1]\n\nSee Also\n--------\nGroupKFold : K-fold iterator variant with non-overlapping groups." - }, - { - "name": "_RepeatedSplits", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "cv", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cross-validator class." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of times cross-validator needs to be repeated." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Passes `random_state` to the arbitrary repeating cross validator. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "**cvargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constructor parameters for cv. Must not contain random_state and shuffle." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility. ``np.zeros(n_samples)`` may be used as a placeholder." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility. ``np.zeros(n_samples)`` may be used as a placeholder." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\ny : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Repeated splits for an arbitrary randomized CV splitter.\n\nRepeats splits for cross-validators n times with different randomization\nin each repetition.\n\nParameters\n----------\ncv : callable\n Cross-validator class.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Passes `random_state` to the arbitrary repeating cross validator.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n**cvargs : additional params\n Constructor parameters for cv. Must not contain random_state\n and shuffle." - }, - { - "name": "RepeatedKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of times cross-validator needs to be repeated." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of each repeated cross-validation instance. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of each repeated cross-validation instance.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n>>> for train_index, test_index in rkf.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [0 1] TEST: [2 3]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times." - }, - { - "name": "RepeatedStratifiedKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of times cross-validator needs to be repeated." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the generation of the random states for each repetition. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the generation of the random states for each repetition.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedStratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n... random_state=36851234)\n>>> for train_index, test_index in rskf.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedKFold : Repeats K-Fold n times." - }, - { - "name": "BaseShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (train, test) indices" - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for ShuffleSplit and StratifiedShuffleSplit" - }, - { - "name": "ShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of re-shuffling & splitting iterations." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.1." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the training and testing indices produced. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import ShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1, 2, 1, 2])\n>>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n>>> rs.get_n_splits(X)\n5\n>>> print(rs)\nShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n>>> for train_index, test_index in rs.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0 4] TEST: [5 2]\nTRAIN: [4 0 2 5] TEST: [1 3]\nTRAIN: [1 2 4 0] TEST: [3 5]\nTRAIN: [3 4 1 0] TEST: [5 2]\nTRAIN: [3 5 1 0] TEST: [2 4]\n>>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n... random_state=0)\n>>> for train_index, test_index in rs.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0] TEST: [5 2]\nTRAIN: [4 0 2] TEST: [1 3]\nTRAIN: [1 2 4] TEST: [3 5]\nTRAIN: [3 4 1] TEST: [5 2]\nTRAIN: [3 5 1] TEST: [2 4]" - }, - { - "name": "GroupShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of re-shuffling & splitting iterations." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of groups to include in the test split (rounded up). If int, represents the absolute number of test groups. If None, the value is set to the complement of the train size. The default will change in version 0.21. It will remain 0.2 only if ``train_size`` is unspecified, otherwise it will complement the specified ``train_size``." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the groups to include in the train split. If int, represents the absolute number of train groups. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the training and testing indices produced. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - } - ], - "docstring": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of re-shuffling & splitting iterations.\n\ntest_size : float, int, default=0.2\n If float, should be between 0.0 and 1.0 and represent the proportion\n of groups to include in the test split (rounded up). If int,\n represents the absolute number of test groups. If None, the value is\n set to the complement of the train size.\n The default will change in version 0.21. It will remain 0.2 only\n if ``train_size`` is unspecified, otherwise it will complement\n the specified ``train_size``.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the groups to include in the train split. If\n int, represents the absolute number of train groups. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupShuffleSplit\n>>> X = np.ones(shape=(8, 2))\n>>> y = np.ones(shape=(8, 1))\n>>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n>>> print(groups.shape)\n(8,)\n>>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n>>> gss.get_n_splits()\n2\n>>> for train_idx, test_idx in gss.split(X, y, groups):\n... print(\"TRAIN:\", train_idx, \"TEST:\", test_idx)\nTRAIN: [2 3 4 5 6 7] TEST: [0 1]\nTRAIN: [0 1 5 6 7] TEST: [2 3 4]" - }, - { - "name": "StratifiedShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of re-shuffling & splitting iterations." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.1." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the training and testing indices produced. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features. Note that providing ``y`` is sufficient to generate the splits and hence ``np.zeros(n_samples)`` may be used as a placeholder for ``X`` instead of actual training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems. Stratification is done based on the y labels." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_labels)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - } - ], - "docstring": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 0, 1, 1, 1])\n>>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n>>> sss.get_n_splits(X, y)\n5\n>>> print(sss)\nStratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n>>> for train_index, test_index in sss.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [5 2 3] TEST: [4 1 0]\nTRAIN: [5 1 4] TEST: [0 2 3]\nTRAIN: [5 0 2] TEST: [4 3 1]\nTRAIN: [4 1 0] TEST: [2 3 5]\nTRAIN: [0 5 1] TEST: [3 4 2]" - }, - { - "name": "PredefinedSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "test_fold", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The entry ``test_fold[i]`` represents the index of the test set that sample ``i`` belongs to. It is possible to exclude sample ``i`` from any test set (i.e. include sample ``i`` in every training set) by setting ``test_fold[i]`` equal to -1." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates boolean masks corresponding to test sets." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - } - ], - "docstring": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\ntest_fold : array-like of shape (n_samples,)\n The entry ``test_fold[i]`` represents the index of the test set that\n sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n any test set (i.e. include sample ``i`` in every training set) by\n setting ``test_fold[i]`` equal to -1.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import PredefinedSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> test_fold = [0, 1, -1, 1]\n>>> ps = PredefinedSplit(test_fold)\n>>> ps.get_n_splits()\n2\n>>> print(ps)\nPredefinedSplit(test_fold=array([ 0, 1, -1, 1]))\n>>> for train_index, test_index in ps.split():\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 2 3] TEST: [0]\nTRAIN: [0 2] TEST: [1 3]" - }, - { - "name": "_CVIterableWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Wrapper class for old style cv objects and iterables." - } - ], - "functions": [ - { - "name": "_validate_shuffle_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation helper to check if the test/test sizes are meaningful wrt to the\nsize of the data (n_samples)" - }, - { - "name": "check_cv", - "decorators": [], - "parameters": [ - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if classifier is True and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value changed from 3-fold to 5-fold." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "classifier", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether the task is a classification task, in which case stratified KFold will be used." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Input checker utility for building a cross-validator\n\nParameters\n----------\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if classifier is True and ``y`` is either\n binary or multiclass, :class:`StratifiedKFold` is used. In all other\n cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value changed from 3-fold to 5-fold.\n\ny : array-like, default=None\n The target variable for supervised learning problems.\n\nclassifier : bool, default=False\n Whether the task is a classification task, in which case\n stratified KFold will be used.\n\nReturns\n-------\nchecked_cv : a cross-validator instance.\n The return value is a cross-validator which generates the train/test\n splits via the ``split`` method." - }, - { - "name": "train_test_split", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Allowed inputs are lists, numpy arrays, scipy-sparse matrices or pandas dataframes." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.25." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the shuffling applied to the data before applying the split. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to shuffle the data before splitting. If shuffle=False then stratify must be None." - }, - { - "name": "stratify", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, data is split in a stratified fashion, using this as the class labels. Read more in the :ref:`User Guide `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split arrays or matrices into random train and test subsets\n\nQuick utility that wraps input validation and\n``next(ShuffleSplit().split(X, y))`` and application to input data\ninto a single call for splitting (and optionally subsampling) data in a\noneliner.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*arrays : sequence of indexables with same length / shape[0]\n Allowed inputs are lists, numpy arrays, scipy-sparse\n matrices or pandas dataframes.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.25.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the shuffling applied to the data before applying the split.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n\nshuffle : bool, default=True\n Whether or not to shuffle the data before splitting. If shuffle=False\n then stratify must be None.\n\nstratify : array-like, default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n Read more in the :ref:`User Guide `.\n\nReturns\n-------\nsplitting : list, length=2 * len(arrays)\n List containing train-test split of inputs.\n\n .. versionadded:: 0.16\n If the input is sparse, the output will be a\n ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n input type.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = np.arange(10).reshape((5, 2)), range(5)\n>>> X\narray([[0, 1],\n [2, 3],\n [4, 5],\n [6, 7],\n [8, 9]])\n>>> list(y)\n[0, 1, 2, 3, 4]\n\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, random_state=42)\n...\n>>> X_train\narray([[4, 5],\n [0, 1],\n [6, 7]])\n>>> y_train\n[2, 0, 3]\n>>> X_test\narray([[2, 3],\n [8, 9]])\n>>> y_test\n[1, 4]\n\n>>> train_test_split(y, shuffle=False)\n[[0, 1, 2], [3, 4]]" - }, - { - "name": "_build_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yields_constant_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection._validation", - "imports": [ - "import warnings", - "import numbers", - "import time", - "from traceback import format_exc", - "from contextlib import suppress", - "import numpy as np", - "import scipy.sparse as sp", - "from joblib import Parallel", - "from joblib import logger", - "from base import is_classifier", - "from base import clone", - "from utils import indexable", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils.validation import _check_fit_params", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils.metaestimators import _safe_split", - "from metrics import check_scoring", - "from metrics._scorer import _check_multimetric_scoring", - "from metrics._scorer import _MultimetricScorer", - "from exceptions import FitFailedWarning", - "from exceptions import NotFittedError", - "from _split import check_cv", - "from preprocessing import LabelEncoder" - ], - "classes": [], - "functions": [ - { - "name": "cross_validate", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. Can be for example a list, or an array." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the cross-validation splits. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'2*n_jobs'", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to include train scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False``" - }, - { - "name": "return_estimator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the estimators fitted on each split. .. versionadded:: 0.20" - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate metric(s) by cross-validation and also record fit/score times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\nscoring : str, callable, list/tuple, or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nreturn_train_score : bool, default=False\n Whether to include train scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\nreturn_estimator : bool, default=False\n Whether to return the estimators fitted on each split.\n\n .. versionadded:: 0.20\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscores : dict of float arrays of shape (n_splits,)\n Array of scores of the estimator for each run of the cross validation.\n\n A dict of arrays containing the score/time arrays for each scorer is\n returned. The possible keys for this ``dict`` are:\n\n ``test_score``\n The score array for test scores on each cv split.\n Suffix ``_score`` in ``test_score`` changes to a specific\n metric like ``test_r2`` or ``test_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n ``train_score``\n The score array for train scores on each cv split.\n Suffix ``_score`` in ``train_score`` changes to a specific\n metric like ``train_r2`` or ``train_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n This is available only if ``return_train_score`` parameter\n is ``True``.\n ``fit_time``\n The time for fitting the estimator on the train\n set for each cv split.\n ``score_time``\n The time for scoring the estimator on the test set for each\n cv split. (Note time for scoring on the train set is not\n included even if ``return_train_score`` is set to ``True``\n ``estimator``\n The estimator objects for each cv split.\n This is available only if ``return_estimator`` parameter\n is set to ``True``.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_validate\n>>> from sklearn.metrics import make_scorer\n>>> from sklearn.metrics import confusion_matrix\n>>> from sklearn.svm import LinearSVC\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n\nSingle metric evaluation using ``cross_validate``\n\n>>> cv_results = cross_validate(lasso, X, y, cv=3)\n>>> sorted(cv_results.keys())\n['fit_time', 'score_time', 'test_score']\n>>> cv_results['test_score']\narray([0.33150734, 0.08022311, 0.03531764])\n\nMultiple metric evaluation using ``cross_validate``\n(please refer the ``scoring`` parameter doc for more information)\n\n>>> scores = cross_validate(lasso, X, y, cv=3,\n... scoring=('r2', 'neg_mean_squared_error'),\n... return_train_score=True)\n>>> print(scores['test_neg_mean_squared_error'])\n[-3635.5... -3573.3... -6114.7...]\n>>> print(scores['train_r2'])\n[0.28010158 0.39088426 0.22784852]\n\nSee Also\n---------\ncross_val_score : Run cross-validation for single metric evaluation.\n\ncross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function." - }, - { - "name": "_insert_error_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Insert error in `results` by replacing them inplace with `error_score`.\n\nThis only applies to multimetric scores because `_fit_and_score` will\nhandle the single metric case." - }, - { - "name": "_normalize_score_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Creates a scoring dictionary based on the type of `scores`" - }, - { - "name": "cross_val_score", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. Can be for example a list, or an array." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)`` which should return only a single value. Similar to :func:`cross_validate` but only a single metric is permitted. If None, the estimator's default scorer (if available) is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the cross-validation splits. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'2*n_jobs'", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate a score by cross-validation\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)`` which should return only\n a single value.\n\n Similar to :func:`cross_validate`\n but only a single metric is permitted.\n\n If None, the estimator's default scorer (if available) is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscores : ndarray of float of shape=(len(list(cv)),)\n Array of scores of the estimator for each run of the cross validation.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_val_score\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n>>> print(cross_val_score(lasso, X, y, cv=3))\n[0.33150734 0.08022311 0.03531764]\n\nSee Also\n---------\ncross_validate : To run cross-validation on multiple metrics and also to\n return train scores, fit times and score times.\n\ncross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function." - }, - { - "name": "_fit_and_score", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "scorer", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If it is a single callable, the return value for ``train_scores`` and ``test_scores`` is a single float. For a dict, it should be one mapping the scorer name to the scorer callable object / function. The callable object / fn should have signature ``scorer(estimator, X, y)``." - }, - { - "name": "train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of training samples." - }, - { - "name": "test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of test samples." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised." - }, - { - "name": "parameters", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to be set on the estimator." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters that will be passed to ``estimator.fit``." - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Compute and return score on training set." - }, - { - "name": "return_parameters", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return parameters that has been used for the estimator." - }, - { - "name": "split_progress", - "type": "Union[List, Tuple[]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list or tuple of format (, )." - }, - { - "name": "candidate_progress", - "type": "Union[List, Tuple[]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list or tuple of format (, )." - }, - { - "name": "return_n_test_samples", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the ``n_test_samples``." - }, - { - "name": "return_times", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the fit/score times." - }, - { - "name": "return_estimator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the fitted estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator and compute scores for a given dataset split.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\nscorer : A single callable or dict mapping scorer name to the callable\n If it is a single callable, the return value for ``train_scores`` and\n ``test_scores`` is a single float.\n\n For a dict, it should be one mapping the scorer name to the scorer\n callable object / function.\n\n The callable object / fn should have signature\n ``scorer(estimator, X, y)``.\n\ntrain : array-like of shape (n_train_samples,)\n Indices of training samples.\n\ntest : array-like of shape (n_test_samples,)\n Indices of test samples.\n\nverbose : int\n The verbosity level.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\nparameters : dict or None\n Parameters to be set on the estimator.\n\nfit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\nreturn_train_score : bool, default=False\n Compute and return score on training set.\n\nreturn_parameters : bool, default=False\n Return parameters that has been used for the estimator.\n\nsplit_progress : {list, tuple} of int, default=None\n A list or tuple of format (, ).\n\ncandidate_progress : {list, tuple} of int, default=None\n A list or tuple of format\n (, ).\n\nreturn_n_test_samples : bool, default=False\n Whether to return the ``n_test_samples``.\n\nreturn_times : bool, default=False\n Whether to return the fit/score times.\n\nreturn_estimator : bool, default=False\n Whether to return the fitted estimator.\n\nReturns\n-------\nresult : dict with the following attributes\n train_scores : dict of scorer name -> float\n Score on training set (for all the scorers),\n returned only if `return_train_score` is `True`.\n test_scores : dict of scorer name -> float\n Score on testing set (for all the scorers).\n n_test_samples : int\n Number of test samples.\n fit_time : float\n Time spent for fitting in seconds.\n score_time : float\n Time spent for scoring in seconds.\n parameters : dict or None\n The parameters that have been evaluated.\n estimator : estimator object\n The fitted estimator.\n fit_failed : bool\n The estimator failed to fit." - }, - { - "name": "_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the score(s) of an estimator on a given test set.\n\nWill return a dict of floats if `scorer` is a dict, otherwise a single\nfloat is returned." - }, - { - "name": "cross_val_predict", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. Can be, for example a list, or an array at least 2d." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and predicting are parallelized over the cross-validation splits. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'2*n_jobs'", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "method", - "type": "Literal['predict', 'predict_proba', 'predict_log_proba', 'decision_function']", - "hasDefault": true, - "default": "'predict'", - "limitation": null, - "ignored": false, - "docstring": "The method to be invoked by `estimator`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate cross-validated estimates for each input data point\n\nThe data is split according to the cv parameter. Each sample belongs\nto exactly one test set, and its prediction is computed with an\nestimator fitted on the corresponding training set.\n\nPassing these predictions into an evaluation metric may not be a valid\nway to measure generalization performance. Results can differ from\n:func:`cross_validate` and :func:`cross_val_score` unless all tests sets\nhave equal size and the metric decomposes over samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be, for example a list, or an array at least 2d.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and\n predicting are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, defualt=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nmethod : {'predict', 'predict_proba', 'predict_log_proba', 'decision_function'}, default='predict'\n The method to be invoked by `estimator`.\n\nReturns\n-------\npredictions : ndarray\n This is the result of calling `method`. Shape:\n\n - When `method` is 'predict' and in special case where `method` is\n 'decision_function' and the target is binary: (n_samples,)\n - When `method` is one of {'predict_proba', 'predict_log_proba',\n 'decision_function'} (unless special case above):\n (n_samples, n_classes)\n - If `estimator` is :term:`multioutput`, an extra dimension\n 'n_outputs' is added to the end of each shape above.\n\nSee Also\n--------\ncross_val_score : Calculate score for each CV split.\ncross_validate : Calculate one or more scores and timings for each CV\n split.\n\nNotes\n-----\nIn the case that one or more classes are absent in a training portion, a\ndefault score needs to be assigned to all instances for that class if\n``method`` produces columns per class, as in {'decision_function',\n'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is\n0. In order to ensure finite output, we approximate negative infinity by\nthe minimum finite float value for the dtype in other cases.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_val_predict\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n>>> y_pred = cross_val_predict(lasso, X, y, cv=3)" - }, - { - "name": "_fit_and_predict", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. .. versionchanged:: 0.20 X is only required to be an object with finite length or shape now" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of training samples." - }, - { - "name": "test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of test samples." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters that will be passed to ``estimator.fit``." - }, - { - "name": "method", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Invokes the passed method name of the passed estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator and predict values for a given dataset split.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit.\n\n .. versionchanged:: 0.20\n X is only required to be an object with finite length or shape now\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\ntrain : array-like of shape (n_train_samples,)\n Indices of training samples.\n\ntest : array-like of shape (n_test_samples,)\n Indices of test samples.\n\nverbose : int\n The verbosity level.\n\nfit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\nmethod : str\n Invokes the passed method name of the passed estimator.\n\nReturns\n-------\npredictions : sequence\n Result of calling 'estimator.method'" - }, - { - "name": "_enforce_prediction_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure that prediction arrays have correct column order\n\nWhen doing cross-validation, if one or more classes are\nnot present in the subset of data used for training,\nthen the output prediction array might not have the same\ncolumns as other folds. Use the list of class names\n(assumed to be ints) to enforce the correct column order.\n\nNote that `classes` is the list of classes in this fold\n(a subset of the classes in the full training set)\nand `n_classes` is the number of classes in the full training set." - }, - { - "name": "_check_is_permutation", - "decorators": [], - "parameters": [ - { - "name": "indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "int array to test" - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of expected elements" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check whether indices is a reordering of the array np.arange(n_samples)\n\nParameters\n----------\nindices : ndarray\n int array to test\nn_samples : int\n number of expected elements\n\nReturns\n-------\nis_partition : bool\n True iff sorted(indices) is np.arange(n)" - }, - { - "name": "permutation_test_score", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels to constrain permutation within groups, i.e. ``y`` values are permuted among samples with the same group identifier. When not specified, ``y`` values are permuted among all samples. When a grouped cross-validator is used, the group labels are also passed on to the ``split`` method of the cross-validator. The cross-validator uses them for grouping the samples while splitting the dataset into train/test set." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. If None the estimator's score method is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_permutations", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of times to permute ``y``." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the cross-validated score are parallelized over the permutations. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pass an int for reproducible output for permutation of ``y`` values among samples. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the significance of a cross-validated score with permutations\n\nPermutes targets to generate 'randomized data' and compute the empirical\np-value against the null hypothesis that features and targets are\nindependent.\n\nThe p-value represents the fraction of randomized data sets where the\nestimator performed as well or better than in the original data. A small\np-value suggests that there is a real dependency between features and\ntargets which has been used by the estimator to give good predictions.\nA large p-value may be due to lack of real dependency between features\nand targets or the estimator was not able to use the dependency to\ngive good predictions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape at least 2D\n The data to fit.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Labels to constrain permutation within groups, i.e. ``y`` values\n are permuted among samples with the same group identifier.\n When not specified, ``y`` values are permuted among all samples.\n\n When a grouped cross-validator is used, the group labels are\n also passed on to the ``split`` method of the cross-validator. The\n cross-validator uses them for grouping the samples while splitting\n the dataset into train/test set.\n\nscoring : str or callable, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n If None the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_permutations : int, default=100\n Number of times to permute ``y``.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the cross-validated score are parallelized over the permutations.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance or None, default=0\n Pass an int for reproducible output for permutation of\n ``y`` values among samples. See :term:`Glossary `.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nscore : float\n The true score without permuting targets.\n\npermutation_scores : array of shape (n_permutations,)\n The scores obtained for each permutations.\n\npvalue : float\n The p-value, which approximates the probability that the score would\n be obtained by chance. This is calculated as:\n\n `(C + 1) / (n_permutations + 1)`\n\n Where C is the number of permutations whose score >= the true score.\n\n The best possible p-value is 1/(n_permutations + 1), the worst is 1.0.\n\nNotes\n-----\nThis function implements Test 1 in:\n\n Ojala and Garriga. `Permutation Tests for Studying Classifier\n Performance\n `_. The\n Journal of Machine Learning Research (2010) vol. 11" - }, - { - "name": "_permutation_test_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Auxiliary function for permutation_test_score" - }, - { - "name": "_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a shuffled copy of y eventually shuffle among same groups." - }, - { - "name": "learning_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An object of that type which is cloned for each validation." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "train_sizes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "exploit_incremental_learning", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If the estimator supports incremental learning, this will be used to speed up fitting for different training set sizes." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the different training and test sets. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The str can be an expression like '2*n_jobs'." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle training data before taking prefixes of it based on``train_sizes``." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``shuffle`` is True. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - }, - { - "name": "return_times", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the fit and score times." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ntrain_sizes : array-like of shape (n_ticks,), default=np.linspace(0.1, 1.0, 5)\n Relative or absolute numbers of training examples that will be used to\n generate the learning curve. If the dtype is float, it is regarded as a\n fraction of the maximum size of the training set (that is determined\n by the selected validation method), i.e. it has to be within (0, 1].\n Otherwise it is interpreted as absolute sizes of the training sets.\n Note that for classification the number of samples usually have to\n be big enough to contain at least one sample from each class.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nexploit_incremental_learning : bool, default=False\n If the estimator supports incremental learning, this will be\n used to speed up fitting for different training set sizes.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the different training and test sets.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\nshuffle : bool, default=False\n Whether to shuffle training data before taking prefixes of it\n based on``train_sizes``.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``shuffle`` is True. Pass an int for reproducible\n output across multiple function calls.\n See :term:`Glossary `.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nreturn_times : bool, default=False\n Whether to return the fit and score times.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that has been used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed.\n\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\nfit_times : array of shape (n_ticks, n_cv_folds)\n Times spent for fitting in seconds. Only present if ``return_times``\n is True.\n\nscore_times : array of shape (n_ticks, n_cv_folds)\n Times spent for scoring in seconds. Only present if ``return_times``\n is True.\n\nNotes\n-----\nSee :ref:`examples/model_selection/plot_learning_curve.py\n`" - }, - { - "name": "_translate_train_sizes", - "decorators": [], - "parameters": [ - { - "name": "train_sizes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of 'n_max_training_samples', i.e. it has to be within (0, 1]." - }, - { - "name": "n_max_training_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of training samples (upper bound of 'train_sizes')." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n _translate_train_sizes([5, 10], 10) -> [5, 10]\n\nParameters\n----------\ntrain_sizes : array-like of shape (n_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. If the dtype is float, it is regarded as a\n fraction of 'n_max_training_samples', i.e. it has to be within (0, 1].\n\nn_max_training_samples : int\n Maximum number of training samples (upper bound of 'train_sizes').\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed." - }, - { - "name": "_incremental_fit_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Train estimator on training subsets incrementally and compute scores." - }, - { - "name": "validation_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An object of that type which is cloned for each validation." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "param_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of the parameter that will be varied." - }, - { - "name": "param_range", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The values of the parameter that will be evaluated." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the combinations of each parameter value and each cross-validation split. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The str can be an expression like '2*n_jobs'." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator. .. versionadded:: 0.24" - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation curve.\n\nDetermine training and test scores for varying parameter values.\n\nCompute scores for an estimator with different values of a specified\nparameter. This is similar to grid search with one parameter. However, this\nwill also compute training scores and is merely a utility for plotting the\nresults.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nparam_name : str\n Name of the parameter that will be varied.\n\nparam_range : array-like of shape (n_values,)\n The values of the parameter that will be evaluated.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the combinations of each parameter\n value and each cross-validation split.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\nNotes\n-----\nSee :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py`" - }, - { - "name": "_aggregate_score_dicts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Aggregate the list of dict to dict of np ndarray\n\nThe aggregated output of _aggregate_score_dicts will be a list of dict\nof form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]\nConvert it to a dict of array {'prec': np.array([0.1 ...]), ...}\n\nParameters\n----------\n\nscores : list of dict\n List of dicts of the scores for all scorers. This is a flat list,\n assumed originally to be of row major order.\n\nExample\n-------\n\n>>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3},\n... {'a': 10, 'b': 10}] # doctest: +SKIP\n>>> _aggregate_score_dicts(scores) # doctest: +SKIP\n{'a': array([1, 2, 3, 10]),\n 'b': array([10, 2, 3, 10])}" - } - ] - }, - { - "name": "sklearn.model_selection", - "imports": [ - "import typing", - "from _split import BaseCrossValidator", - "from _split import KFold", - "from _split import GroupKFold", - "from _split import StratifiedKFold", - "from _split import TimeSeriesSplit", - "from _split import LeaveOneGroupOut", - "from _split import LeaveOneOut", - "from _split import LeavePGroupsOut", - "from _split import LeavePOut", - "from _split import RepeatedKFold", - "from _split import RepeatedStratifiedKFold", - "from _split import ShuffleSplit", - "from _split import GroupShuffleSplit", - "from _split import StratifiedShuffleSplit", - "from _split import PredefinedSplit", - "from _split import train_test_split", - "from _split import check_cv", - "from _validation import cross_val_score", - "from _validation import cross_val_predict", - "from _validation import cross_validate", - "from _validation import learning_curve", - "from _validation import permutation_test_score", - "from _validation import validation_curve", - "from _search import GridSearchCV", - "from _search import RandomizedSearchCV", - "from _search import ParameterGrid", - "from _search import ParameterSampler", - "from _search import fit_grid_point", - "from _search_successive_halving import HalvingGridSearchCV", - "from _search_successive_halving import HalvingRandomSearchCV" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.model_selection.tests.common", - "imports": [ - "import numpy as np", - "from sklearn.model_selection import KFold" - ], - "classes": [ - { - "name": "OneTimeSplitter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split can be called only once" - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A wrapper to make KFold single entry cv iterator" - } - ], - "functions": [] - }, - { - "name": "sklearn.model_selection.tests.test_search", - "imports": [ - "from collections.abc import Iterable", - "from collections.abc import Sized", - "from io import StringIO", - "from itertools import chain", - "from itertools import product", - "from functools import partial", - "import pickle", - "import sys", - "from types import GeneratorType", - "import re", - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import MinimalClassifier", - "from sklearn.utils._testing import MinimalRegressor", - "from sklearn.utils._testing import MinimalTransformer", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.utils._mocking import MockDataFrame", - "from scipy.stats import bernoulli", - "from scipy.stats import expon", - "from scipy.stats import uniform", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.model_selection import fit_grid_point", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import StratifiedShuffleSplit", - "from sklearn.model_selection import LeaveOneGroupOut", - "from sklearn.model_selection import LeavePGroupsOut", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import GroupShuffleSplit", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RandomizedSearchCV", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.model_selection import ParameterSampler", - "from sklearn.model_selection._search import BaseSearchCV", - "from sklearn.model_selection._validation import FitFailedWarning", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import SVC", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.cluster import KMeans", - "from sklearn.neighbors import KernelDensity", - "from sklearn.neighbors import LocalOutlierFactor", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import r2_score", - "from sklearn.metrics.pairwise import euclidean_distances", - "from sklearn.impute import SimpleImputer", - "from sklearn.pipeline import Pipeline", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.model_selection.tests.common import OneTimeSplitter", - "from pandas import Series", - "from pandas import DataFrame" - ], - "classes": [ - { - "name": "MockClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the parameter search algorithms" - }, - { - "name": "LinearSVCNoScore", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An LinearSVC classifier that has no score method." - }, - { - "name": "BrokenClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Broken classifier that cannot be fit twice" - }, - { - "name": "FailingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Classifier that raises a ValueError on fit()" - } - ], - "functions": [ - { - "name": "assert_grid_iter_equals_getitem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validate_parameter_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameter_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_pipeline_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_SearchCV_with_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_no_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_score_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes__property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_trivial_cv_results_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_one_grid_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_when_param_grid_includes_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_bad_param_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_sparse_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_precomputed_kernel_error_nonsquare", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_refit_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test refit=callable, which adds flexibility in identifying the\n\"best\" estimator." - }, - { - "name": "test_refit_callable_invalid_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation catches the errors when 'best_index_' returns an\ninvalid result." - }, - { - "name": "test_refit_callable_out_bound", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation catches the errors when 'best_index_' returns an\nout of bound result." - }, - { - "name": "test_refit_callable_multi_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test refit=callable in multiple metric evaluation setting" - }, - { - "name": "test_gridsearch_nd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_as_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_y_as_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupervised_grid_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_no_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_param_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cv_results_array_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cv_results_keys", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_cv_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_cv_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_default_iid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_cv_results_multimetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_cv_results_multimetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "compare_cv_results_multimetric_with_single", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare multi-metric cv_results with the ensemble of multiple\nsingle metric cv_results from single metric grid/random search" - }, - { - "name": "compare_refit_methods_when_refit_with_acc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare refit multi-metric search methods with single metric methods" - }, - { - "name": "test_search_cv_score_samples_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_score_samples_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_results_rank_tie_breaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_results_none_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_timing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_correct_score_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_grid_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_grid_point_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_with_multioutput_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_disabled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_allows_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_failing_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_failing_classifier_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameters_sampler_replacement", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stochastic_gradient_loss_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_train_scores_set_to_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_cv_splits_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_inverse_transform_round_trip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__custom_fit_no_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_empty_cv_iterator_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_bad_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_searchcv_raise_warning_with_non_finite_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_confusion_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_same_as_list_of_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_single_metric_same_as_single_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_error_on_invalid_key", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_error_failing_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_clf_all_fails", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_pairwise_property_delegated_to_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation of BaseSearchCV has the pairwise tag\nwhich matches the pairwise tag of its estimator.\nThis test make sure pairwise tag is delegated to the base estimator.\n\nNon-regression test for issue #13920." - }, - { - "name": "test_search_cv__pairwise_property_delegated_to_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation of BaseSearchCV has the _pairwise property\nwhich matches the _pairwise property of its estimator.\nThis test make sure _pairwise is delegated to the base estimator.\n\nNon-regression test for issue #13920." - }, - { - "name": "test_search_cv_pairwise_property_equivalence_of_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation of BaseSearchCV has the pairwise tag\nwhich matches the pairwise tag of its estimator.\nThis test ensures the equivalence of 'precomputed'.\n\nNon-regression test for issue #13920." - }, - { - "name": "test_scalar_fit_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scalar_fit_param_compat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_using_minimal_compatible_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests.test_split", - "imports": [ - "import warnings", - "import pytest", - "import numpy as np", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy import stats", - "from scipy.special import comb", - "from itertools import combinations", - "from itertools import combinations_with_replacement", - "from itertools import permutations", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import TimeSeriesSplit", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.model_selection import LeaveOneGroupOut", - "from sklearn.model_selection import LeavePOut", - "from sklearn.model_selection import LeavePGroupsOut", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.model_selection import GroupShuffleSplit", - "from sklearn.model_selection import StratifiedShuffleSplit", - "from sklearn.model_selection import PredefinedSplit", - "from sklearn.model_selection import check_cv", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RepeatedKFold", - "from sklearn.model_selection import RepeatedStratifiedKFold", - "from sklearn.linear_model import Ridge", - "from sklearn.model_selection._split import _validate_shuffle_split", - "from sklearn.model_selection._split import _build_repr", - "from sklearn.model_selection._split import _yields_constant_splits", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import make_classification", - "from sklearn.svm import SVC", - "from pandas import DataFrame" - ], - "classes": [], - "functions": [ - { - "name": "test_cross_validator_with_default_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2d_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_valid_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cv_coverage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_valueerrors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_no_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_kfold_no_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_kfold_ratios", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_kfold_label_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_balance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratifiedkfold_balance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_kfold_stratifiedkfold_reproducibility", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_stratifiedkfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_can_detect_dependent_samples_on_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_split_default_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_group_shuffle_split_default_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_respects_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_even", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_overlap_train_test_bug", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_multilabel_many_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predefinedsplit_with_kfold_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_group_shuffle_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_one_p_group_out", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_group_out_changing_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_one_p_group_out_error_on_fewer_number_of_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_cv_value_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_cv_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_kfold_determinstic_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_n_splits_for_repeated_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_n_splits_for_repeated_stratified_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_stratified_kfold_determinstic_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_invalid_sizes1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_invalid_sizes2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_default_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_mock_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_list_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shufflesplit_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shufflesplit_reproducible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratifiedshufflesplit_list_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_allow_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cv_iterable_wrapper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_group_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_time_series_max_train_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_max_train_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_gap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nested_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_build_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_split_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_one_out_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_p_out_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_state_shuffle_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_yields_constant_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests.test_successive_halving", - "imports": [ - "from math import ceil", - "import pytest", - "from scipy.stats import norm", - "from scipy.stats import randint", - "import numpy as np", - "from sklearn.datasets import make_classification", - "from sklearn.dummy import DummyClassifier", - "from sklearn.experimental import enable_halving_search_cv", - "from sklearn.model_selection import HalvingGridSearchCV", - "from sklearn.model_selection import HalvingRandomSearchCV", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.model_selection._search_successive_halving import _SubsampleMetaSplitter", - "from sklearn.model_selection._search_successive_halving import _top_k", - "from sklearn.model_selection._search_successive_halving import _refit_callable" - ], - "classes": [ - { - "name": "FastClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier that accepts parameters a, b, ... z.\n\nThese parameter don't affect the predictions and are useful for fast\ngrid searching." - } - ], - "functions": [ - { - "name": "test_aggressive_elimination", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_resources", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resource_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_discrete_distributions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_errors_randomized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsample_splitter_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsample_splitter_determinism", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_refit_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cv_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests.test_validation", - "imports": [ - "import os", - "import re", - "import sys", - "import tempfile", - "import warnings", - "from functools import partial", - "from time import sleep", - "import pytest", - "import numpy as np", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csr_matrix", - "from sklearn.exceptions import FitFailedWarning", - "from sklearn.model_selection.tests.test_search import FailingClassifier", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.utils.validation import _num_samples", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.model_selection import cross_validate", - "from sklearn.model_selection import permutation_test_score", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.model_selection import LeaveOneGroupOut", - "from sklearn.model_selection import LeavePGroupsOut", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import GroupShuffleSplit", - "from sklearn.model_selection import learning_curve", - "from sklearn.model_selection import validation_curve", - "from sklearn.model_selection._validation import _check_is_permutation", - "from sklearn.model_selection._validation import _fit_and_score", - "from sklearn.model_selection._validation import _score", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_digits", - "from sklearn.metrics import explained_variance_score", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import precision_recall_fscore_support", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import check_scoring", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import PassiveAggressiveClassifier", - "from sklearn.linear_model import RidgeClassifier", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.svm import SVC", - "from sklearn.svm import LinearSVC", - "from sklearn.cluster import KMeans", - "from sklearn.impute import SimpleImputer", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.pipeline import Pipeline", - "from io import StringIO", - "from sklearn.base import BaseEstimator", - "from sklearn.base import clone", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.utils import shuffle", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.model_selection.tests.common import OneTimeSplitter", - "from sklearn.model_selection import GridSearchCV", - "from pandas import Series", - "from pandas import DataFrame" - ], - "classes": [ - { - "name": "MockImprovingEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the learning curve" - }, - { - "name": "MockIncrementalImprovingEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier that provides partial_fit" - }, - { - "name": "MockEstimatorWithParameter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the validation curve" - }, - { - "name": "MockEstimatorWithSingleFitCallAllowed", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier that disallows repeated calls of fit method" - }, - { - "name": "MockClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The dummy arguments are to test that this fit function can\naccept non-array arguments through cross-validation, such as:\n - int\n - str (this is actually array-like)\n - object\n - function" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the cross-validation" - }, - { - "name": "RFWithDecisionFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_cross_val_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_many_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_invalid_scoring_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_nested_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_validate_single_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_validate_multi_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_predict_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_score_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_with_score_func_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_with_score_func_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_test_score_allow_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_test_score_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_allow_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_decision_function_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_predict_proba_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_predict_log_proba_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_input_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_unbalanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_y_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_sparse_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_unsupervised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning_not_possible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning_unsupervised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_batch_and_incremental_learning_are_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_n_sample_range_out_of_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_remove_duplicate_sample_sizes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_with_boolean_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_with_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve_clone_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve_cv_splits_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_permutation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_sparse_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_val_predict_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for tests of cross_val_predict with binary classification" - }, - { - "name": "check_cross_val_predict_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for tests of cross_val_predict with multiclass classification" - }, - { - "name": "check_cross_val_predict_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the output of cross_val_predict for 2D targets using\nEstimators which provide a predictions as a list with one\nelement per class." - }, - { - "name": "check_cross_val_predict_with_method_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_val_predict_with_method_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_method_checking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearchcv_cross_val_predict_with_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_multilabel_ovr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_multilabel_rf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_rare_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_multilabel_rf_rare_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_expected_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_class_subset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_test_score_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_and_score_failing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_and_score_working", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_failing_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_failing_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_failing_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "three_params_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_and_score_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_confusion_matrix_cross_validate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neighbors.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors._base", - "imports": [ - "from functools import partial", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numbers", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "import joblib", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _ball_tree import BallTree", - "from _kd_tree import KDTree", - "from base import BaseEstimator", - "from base import MultiOutputMixin", - "from base import is_classifier", - "from metrics import pairwise_distances_chunked", - "from metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS", - "from utils import check_array", - "from utils import gen_even_slices", - "from utils import _to_object_array", - "from utils.deprecation import deprecated", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import check_non_negative", - "from utils.fixes import delayed", - "from utils.fixes import parse_version", - "from exceptions import DataConversionWarning", - "from exceptions import EfficiencyWarning" - ], - "classes": [ - { - "name": "NeighborsBase", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_algorithm_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for nearest neighbors estimators." - }, - { - "name": "KNeighborsMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_kneighbors_reduce_func", - "decorators": [], - "parameters": [ - { - "name": "dist", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The distance matrix." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index in X which the first row of dist corresponds to." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors required for each sample." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\nParameters\n----------\ndist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\nstart : int\n The index in X which the first row of dist corresponds to.\n\nn_neighbors : int\n Number of neighbors required for each sample.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\ndist : array of shape (n_samples_chunk, n_neighbors)\n Returned only if `return_distance=True`.\n\nneigh : array of shape (n_samples_chunk, n_neighbors)\n The neighbors indices." - }, - { - "name": "kneighbors", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors required for each sample. The default is the value passed to the constructor." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed', default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nn_neighbors : int, default=None\n Number of neighbors required for each sample. The default is the\n value passed to the constructor.\n\nreturn_distance : bool, default=True\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_queries, n_neighbors)\n Array representing the lengths to points, only present if\n return_distance=True\n\nneigh_ind : ndarray of shape (n_queries, n_neighbors)\n Indices of the nearest points in the population matrix.\n\nExamples\n--------\nIn the following example, we construct a NearestNeighbors\nclass from an array representing our data set and ask who's\nthe closest point to [1,1,1]\n\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=1)\n>>> neigh.fit(samples)\nNearestNeighbors(n_neighbors=1)\n>>> print(neigh.kneighbors([[1., 1., 1.]]))\n(array([[0.5]]), array([[2]]))\n\nAs you can see, it returns [[0.5]], and [[2]], which means that the\nelement is at distance 0.5 and is the third element of samples\n(indexes start at 0). You can also query for multiple points:\n\n>>> X = [[0., 1., 0.], [1., 0., 1.]]\n>>> neigh.kneighbors(X, return_distance=False)\narray([[1],\n [2]]...)" - }, - { - "name": "kneighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. For ``metric='precomputed'`` the shape should be (n_queries, n_indexed). Otherwise the shape should be (n_queries, n_features)." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample. The default is the value passed to the constructor." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, in 'distance' the edges are Euclidean distance between points." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of k-Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed', default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n For ``metric='precomputed'`` the shape should be\n (n_queries, n_indexed). Otherwise the shape should be\n (n_queries, n_features).\n\nn_neighbors : int, default=None\n Number of neighbors for each sample. The default is the value\n passed to the constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=2)\n>>> neigh.fit(X)\nNearestNeighbors(n_neighbors=2)\n>>> A = neigh.kneighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\nSee Also\n--------\nNearestNeighbors.radius_neighbors_graph" - } - ], - "docstring": "Mixin for k-neighbors searches" - }, - { - "name": "RadiusNeighborsMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_radius_neighbors_reduce_func", - "decorators": [], - "parameters": [ - { - "name": "dist", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The distance matrix." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index in X which the first row of dist corresponds to." - }, - { - "name": "radius", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The radius considered when making the nearest neighbors search." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\nParameters\n----------\ndist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\nstart : int\n The index in X which the first row of dist corresponds to.\n\nradius : float\n The radius considered when making the nearest neighbors search.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\ndist : list of ndarray of shape (n_samples_chunk,)\n Returned only if `return_distance=True`.\n\nneigh : list of ndarray of shape (n_samples_chunk,)\n The neighbors indices." - }, - { - "name": "radius_neighbors", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Limiting distance of neighbors to return. The default is the value passed to the constructor." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - }, - { - "name": "sort_results", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the distances and indices will be sorted by increasing distances before being returned. If False, the results may not be sorted. If `return_distance=False`, setting `sort_results=True` will result in an error. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.\n\nParameters\n----------\nX : array-like of (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n Limiting distance of neighbors to return. The default is the value\n passed to the constructor.\n\nreturn_distance : bool, default=True\n Whether or not to return the distances.\n\nsort_results : bool, default=False\n If True, the distances and indices will be sorted by increasing\n distances before being returned. If False, the results may not\n be sorted. If `return_distance=False`, setting `sort_results=True`\n will result in an error.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n Array representing the distances to each point, only present if\n `return_distance=True`. The distance values are computed according\n to the ``metric`` constructor parameter.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n An array of arrays of indices of the approximate nearest points\n from the population matrix that lie within a ball of size\n ``radius`` around the query points.\n\nExamples\n--------\nIn the following example, we construct a NeighborsClassifier\nclass from an array representing our data set and ask who's\nthe closest point to [1, 1, 1]:\n\n>>> import numpy as np\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.6)\n>>> neigh.fit(samples)\nNearestNeighbors(radius=1.6)\n>>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n>>> print(np.asarray(rng[0][0]))\n[1.5 0.5]\n>>> print(np.asarray(rng[1][0]))\n[1 2]\n\nThe first array returned contains the distances to all points which\nare closer than 1.6, while the second array returned contains their\nindices. In general, multiple points can be queried at the same time.\n\nNotes\n-----\nBecause the number of neighbors of each point is not necessarily\nequal, the results for multiple query points cannot be fit in a\nstandard data array.\nFor efficiency, `radius_neighbors` returns arrays of objects, where\neach object is a 1D array of indices or distances." - }, - { - "name": "radius_neighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhoods. The default is the value passed to the constructor." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, in 'distance' the edges are Euclidean distance between points." - }, - { - "name": "sort_results", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, in each row of the result, the non-zero entries will be sorted by increasing distances. If False, the non-zero entries may not be sorted. Only used with mode='distance'. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n Radius of neighborhoods. The default is the value passed to the\n constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\nsort_results : bool, default=False\n If True, in each row of the result, the non-zero entries will be\n sorted by increasing distances. If False, the non-zero entries may\n not be sorted. Only used with mode='distance'.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix if of format CSR.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.5)\n>>> neigh.fit(X)\nNearestNeighbors(radius=1.5)\n>>> A = neigh.radius_neighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\nSee Also\n--------\nkneighbors_graph" - } - ], - "docstring": "Mixin for radius-based neighbors searches" - } - ], - "functions": [ - { - "name": "_check_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check to make sure weights are valid" - }, - { - "name": "_get_weights", - "decorators": [], - "parameters": [ - { - "name": "dist", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input distances." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kind of weighting used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the weights from an array of distances and a parameter ``weights``\n\nParameters\n----------\ndist : ndarray\n The input distances.\n\nweights : {'uniform', 'distance' or a callable}\n The kind of weighting used.\n\nReturns\n-------\nweights_arr : array of the same shape as ``dist``\n If ``weights == 'uniform'``, then returns None." - }, - { - "name": "_is_sorted_by_data", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors graph as given by `kneighbors_graph` or `radius_neighbors_graph`. Matrix should be of format CSR format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the graph's non-zero entries are sorted by data\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n - sorted by indices, as after graph.sort_indices();\n - sorted by data, as after _check_precomputed(graph);\n - not sorted.\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nReturns\n-------\nres : bool\n Whether input graph is sorted by data." - }, - { - "name": "_check_precomputed", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distance matrix to other samples. X may be a sparse matrix, in which case only non-zero elements may be considered neighbors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check precomputed distance matrix\n\nIf the precomputed distance matrix is sparse, it checks that the non-zero\nentries are sorted by distances. If not, the matrix is copied and sorted.\n\nParameters\n----------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors.\n\nReturns\n-------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors." - }, - { - "name": "_kneighbors_from_graph", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors graph as given by `kneighbors_graph` or `radius_neighbors_graph`. Matrix should be of format CSR format." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors required for each sample." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decompose a nearest neighbors sparse graph into distances and indices\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nn_neighbors : int\n Number of neighbors required for each sample.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples, n_neighbors)\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\nneigh_ind : ndarray of shape (n_samples, n_neighbors)\n Indices of nearest neighbors." - }, - { - "name": "_radius_neighbors_from_graph", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors graph as given by `kneighbors_graph` or `radius_neighbors_graph`. Matrix should be of format CSR format." - }, - { - "name": "radius", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhoods which should be strictly positive." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decompose a nearest neighbors sparse graph into distances and indices\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nradius : float\n Radius of neighborhoods which should be strictly positive.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n Indices of nearest neighbors." - }, - { - "name": "_tree_query_parallel_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for the Parallel calls in KNeighborsMixin.kneighbors\n\nThe Cython method tree.query is not directly picklable by cloudpickle\nunder PyPy." - }, - { - "name": "_tree_query_radius_parallel_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors\n\nThe Cython method tree.query_radius is not directly picklable by\ncloudpickle under PyPy." - } - ] - }, - { - "name": "sklearn.neighbors._classification", - "imports": [ - "import numpy as np", - "from scipy import stats", - "from utils.extmath import weighted_mode", - "from utils.validation import _is_arraylike", - "from utils.validation import _num_samples", - "import warnings", - "from _base import _check_weights", - "from _base import _get_weights", - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from base import ClassifierMixin", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KNeighborsClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. Doesn't affect :meth:`fit` method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the k-nearest neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : KNeighborsClassifier\n The fitted k-nearest neighbors classifier." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order." - } - ], - "docstring": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\nclasses_ : array of shape (n_classes,)\n Class labels known to the classifier\n\neffective_metric_ : str or callble\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\noutputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> neigh = KNeighborsClassifier(n_neighbors=3)\n>>> neigh.fit(X, y)\nKNeighborsClassifier(...)\n>>> print(neigh.predict([[1.1]]))\n[0]\n>>> print(neigh.predict_proba([[0.9]]))\n[[0.66666667 0.33333333]]\n\nSee Also\n--------\nRadiusNeighborsClassifier\nKNeighborsRegressor\nRadiusNeighborsRegressor\nNearestNeighbors\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances\n but different labels, the results will depend on the ordering of the\n training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - }, - { - "name": "RadiusNeighborsClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Range of parameter space to use by default for :meth:`radius_neighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "outlier_label", - "type": "Literal['most_frequent']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "label for outlier samples (samples with no neighbors in given radius). - manual label: str or int label (should be the same type as y) or list of manual labels if multi-output is used. - 'most_frequent' : assign the most frequent label of y to outliers. - None : when any outlier is detected, ValueError will be raised." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the radius neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : RadiusNeighborsClassifier\n The fitted radius neighbors classifier." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order." - } - ], - "docstring": "Classifier implementing a vote among neighbors within a given radius\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\noutlier_label : {manual label, 'most_frequent'}, default=None\n label for outlier samples (samples with no neighbors in given radius).\n\n - manual label: str or int label (should be the same type as y)\n or list of manual labels if multi-output is used.\n - 'most_frequent' : assign the most frequent label of y to outliers.\n - None : when any outlier is detected, ValueError will be raised.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier.\n\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\noutlier_label_ : int or array-like of shape (n_class,)\n Label which is given for outlier samples (samples with no neighbors\n on given radius).\n\noutputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsClassifier\n>>> neigh = RadiusNeighborsClassifier(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsClassifier(...)\n>>> print(neigh.predict([[1.5]]))\n[0]\n>>> print(neigh.predict_proba([[1.0]]))\n[[0.66666667 0.33333333]]\n\nSee Also\n--------\nKNeighborsClassifier\nRadiusNeighborsRegressor\nKNeighborsRegressor\nNearestNeighbors\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._graph", - "imports": [ - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from _base import NeighborsBase", - "from _unsupervised import NearestNeighbors", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KNeighborsTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "mode", - "type": "Literal['distance', 'connectivity']", - "hasDefault": true, - "default": "'distance'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample in the transformed sparse graph. For compatibility reasons, as each sample is considered as its own neighbor, one extra neighbor will be computed when mode == 'distance'. In this case, the sparse graph contains (n_neighbors + 1) neighbors." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the k-nearest neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\nReturns\n-------\nself : KNeighborsTransformer\n The fitted k-nearest neighbors transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_samples_transform, n_features)\n Sample data.\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training set.\n\ny : ignored\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform X into a (weighted) graph of k nearest neighbors\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nn_neighbors : int, default=5\n Number of neighbors for each sample in the transformed sparse graph.\n For compatibility reasons, as each sample is considered as its own\n neighbor, one extra neighbor will be computed when mode == 'distance'.\n In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> from sklearn.manifold import Isomap\n>>> from sklearn.neighbors import KNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> estimator = make_pipeline(\n... KNeighborsTransformer(n_neighbors=5, mode='distance'),\n... Isomap(neighbors_algorithm='precomputed'))" - }, - { - "name": "RadiusNeighborsTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "mode", - "type": "Literal['distance', 'connectivity']", - "hasDefault": true, - "default": "'distance'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhood in the transformed sparse graph." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the radius neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\nReturns\n-------\nself : RadiusNeighborsTransformer\n The fitted radius neighbors transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_samples_transform, n_features)\n Sample data\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training set.\n\ny : ignored\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform X into a (weighted) graph of neighbors nearer than a radius\n\nThe transformed data is a sparse graph as returned by\nradius_neighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nradius : float, default=1.\n Radius of neighborhood in the transformed sparse graph.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> from sklearn.neighbors import RadiusNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> estimator = make_pipeline(\n... RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n... DBSCAN(min_samples=30, metric='precomputed'))" - } - ], - "functions": [ - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the validity of the input parameters" - }, - { - "name": "_query_include_self", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the query based on include_self param" - }, - { - "name": "kneighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, in the form of a numpy array or a precomputed :class:`BallTree`." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "The distance metric used to calculate the k-Neighbors for each sample point. The DistanceMetric class gives a list of available metrics. The default distance is 'euclidean' ('minkowski' metric with the p param equal to 2.)" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "additional keyword arguments for the metric function." - }, - { - "name": "include_self", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to mark each sample as the first nearest neighbor to itself. If 'auto', then True is used for mode='connectivity' and False for mode='distance'." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of k-Neighbors for points in X\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\nn_neighbors : int\n Number of neighbors for each sample.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nmetric : str, default='minkowski'\n The distance metric used to calculate the k-Neighbors for each sample\n point. The DistanceMetric class gives a list of available metrics.\n The default distance is 'euclidean' ('minkowski' metric with the p\n param equal to 2.)\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n additional keyword arguments for the metric function.\n\ninclude_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that\n connects i to j. The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import kneighbors_graph\n>>> A = kneighbors_graph(X, 2, mode='connectivity', include_self=True)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\nSee Also\n--------\nradius_neighbors_graph" - }, - { - "name": "radius_neighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, in the form of a numpy array or a precomputed :class:`BallTree`." - }, - { - "name": "radius", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhoods." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "The distance metric used to calculate the neighbors within a given radius for each sample point. The DistanceMetric class gives a list of available metrics. The default distance is 'euclidean' ('minkowski' metric with the param equal to 2.)" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "additional keyword arguments for the metric function." - }, - { - "name": "include_self", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to mark each sample as the first nearest neighbor to itself. If 'auto', then True is used for mode='connectivity' and False for mode='distance'." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\nradius : float\n Radius of neighborhoods.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nmetric : str, default='minkowski'\n The distance metric used to calculate the neighbors within a\n given radius for each sample point. The DistanceMetric class\n gives a list of available metrics. The default distance is\n 'euclidean' ('minkowski' metric with the param equal to 2.)\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n additional keyword arguments for the metric function.\n\ninclude_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that connects\n i to j. The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import radius_neighbors_graph\n>>> A = radius_neighbors_graph(X, 1.5, mode='connectivity',\n... include_self=True)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\nSee Also\n--------\nkneighbors_graph" - } - ] - }, - { - "name": "sklearn.neighbors._kde", - "imports": [ - "import numpy as np", - "from scipy.special import gammainc", - "from base import BaseEstimator", - "from utils import check_array", - "from utils import check_random_state", - "from utils.validation import _check_sample_weight", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import row_norms", - "from _ball_tree import BallTree", - "from _ball_tree import DTYPE", - "from _kd_tree import KDTree" - ], - "classes": [ - { - "name": "KernelDensity", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "bandwidth", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The bandwidth of the kernel." - }, - { - "name": "algorithm", - "type": "Literal['kd_tree', 'ball_tree', 'auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The tree algorithm to use." - }, - { - "name": "kernel", - "type": "Literal['gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine']", - "hasDefault": true, - "default": "'gaussian'", - "limitation": null, - "ignored": false, - "docstring": "The kernel to use." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'euclidian'", - "limitation": null, - "ignored": false, - "docstring": "The distance metric to use. Note that not all metrics are valid with all algorithms. Refer to the documentation of :class:`BallTree` and :class:`KDTree` for a description of available algorithms. Note that the normalization of the density output is correct only for the Euclidean distance metric. Default is 'euclidean'." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The desired absolute tolerance of the result. A larger tolerance will generally lead to faster execution." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The desired relative tolerance of the result. A larger tolerance will generally lead to faster execution." - }, - { - "name": "breadth_first", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true (default), use a breadth-first approach to the problem. Otherwise use a depth-first approach." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "40", - "limitation": null, - "ignored": false, - "docstring": "Specify the leaf size of the underlying tree. See :class:`BallTree` or :class:`KDTree` for details." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters to be passed to the tree for use with the metric. For more information, see the documentation of :class:`BallTree` or :class:`KDTree`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_choose_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of sample weights attached to the data X. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the Kernel Density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n List of sample weights attached to the data X.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n Returns instance of object." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An array of points to query. Last dimension should match dimension of training data (n_features)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the log density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n An array of points to query. Last dimension should match dimension\n of training data (n_features).\n\nReturns\n-------\ndensity : ndarray of shape (n_samples,)\n The array of log(density) evaluations. These are normalized to be\n probability densities, so values will be low for high-dimensional\n data." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the total log probability density under the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nlogprob : float\n Total log-likelihood of the data in X. This is normalized to be a\n probability density, so the value will be low for high-dimensional\n data." - }, - { - "name": "sample", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to generate random samples. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate random samples from the model.\n\nCurrently, this is implemented only for gaussian and tophat kernels.\n\nParameters\n----------\nn_samples : int, default=1\n Number of samples to generate.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to generate\n random samples. Pass an int for reproducible results\n across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_features)\n List of samples." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=1.0\n The bandwidth of the kernel.\n\nalgorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n The tree algorithm to use.\n\nkernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine'}, default='gaussian'\n The kernel to use.\n\nmetric : str, default='euclidian'\n The distance metric to use. Note that not all metrics are\n valid with all algorithms. Refer to the documentation of\n :class:`BallTree` and :class:`KDTree` for a description of\n available algorithms. Note that the normalization of the density\n output is correct only for the Euclidean distance metric. Default\n is 'euclidean'.\n\natol : float, default=0\n The desired absolute tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\nrtol : float, default=0\n The desired relative tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\nbreadth_first : bool, default=True\n If true (default), use a breadth-first approach to the problem.\n Otherwise use a depth-first approach.\n\nleaf_size : int, default=40\n Specify the leaf size of the underlying tree. See :class:`BallTree`\n or :class:`KDTree` for details.\n\nmetric_params : dict, default=None\n Additional parameters to be passed to the tree for use with the\n metric. For more information, see the documentation of\n :class:`BallTree` or :class:`KDTree`.\n\nAttributes\n----------\ntree_ : ``BinaryTree`` instance\n The tree algorithm for fast generalized N-point problems.\n\nSee Also\n--------\nsklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n problems.\nsklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n problems.\n\nExamples\n--------\nCompute a gaussian kernel density estimate with a fixed bandwidth.\n\n>>> import numpy as np\n>>> rng = np.random.RandomState(42)\n>>> X = rng.random_sample((100, 3))\n>>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n>>> log_density = kde.score_samples(X[:3])\n>>> log_density\narray([-1.52955942, -1.51462041, -1.60244657])" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._lof", - "imports": [ - "import numpy as np", - "import warnings", - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from base import OutlierMixin", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils import check_array" - ], - "classes": [ - { - "name": "LocalOutlierFactor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries. If n_neighbors is larger than the number of samples provided, all samples will be used." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "metric used for the distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a sparse matrix, in which case only \"nonzero\" elements may be considered neighbors. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics: https://docs.scipy.org/doc/scipy/reference/spatial.distance.html" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "contamination", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. When fitting this is used to define the threshold on the scores of the samples. - if 'auto', the threshold is determined as in the original paper, - if a float, the contamination should be in the range [0, 0.5]. .. versionchanged:: 0.22 The default value of ``contamination`` changed from 0.1 to ``'auto'``." - }, - { - "name": "novelty", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "By default, LocalOutlierFactor is only meant to be used for outlier detection (novelty=False). Set novelty to True if you want to use LocalOutlierFactor for novelty detection. In this case be aware that that you should only use predict, decision_function and score_samples on new unseen data and not on the training set. .. versionadded:: 0.20" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits the model to the training set X and returns the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers." - }, - { - "name": "_fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits the model to the training set X and returns the labels.\n\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the local outlier factor detector from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : LocalOutlierFactor\n The fitted local outlier factor detector." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers." - }, - { - "name": "_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples. If None, makes prediction on the training data without considering them as their own neighbors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples. If None, makes prediction on the\n training data without considering them as their own neighbors.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal." - }, - { - "name": "_score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal." - }, - { - "name": "_local_reachability_density", - "decorators": [], - "parameters": [ - { - "name": "distances_X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances to the neighbors (in the training samples `self._fit_X`) of each query point to compute the LRD." - }, - { - "name": "neighbors_indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors indices (of each query point) among training samples self._fit_X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The local reachability density (LRD)\n\nThe LRD of a sample is the inverse of the average reachability\ndistance of its k-nearest neighbors.\n\nParameters\n----------\ndistances_X : ndarray of shape (n_queries, self.n_neighbors)\n Distances to the neighbors (in the training samples `self._fit_X`)\n of each query point to compute the LRD.\n\nneighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n Neighbors indices (of each query point) among training samples\n self._fit_X.\n\nReturns\n-------\nlocal_reachability_density : ndarray of shape (n_queries,)\n The local reachability density of each sample." - } - ], - "docstring": "Unsupervised Outlier Detection using Local Outlier Factor (LOF)\n\nThe anomaly score of each sample is called Local Outlier Factor.\nIt measures the local deviation of density of a given sample with\nrespect to its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of\nits neighbors, one can identify samples that have a substantially lower\ndensity than their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_neighbors : int, default=20\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n If n_neighbors is larger than the number of samples provided,\n all samples will be used.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric used for the distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a sparse matrix, in which case only \"nonzero\"\n elements may be considered neighbors.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics:\n https://docs.scipy.org/doc/scipy/reference/spatial.distance.html\n\np : int, default=2\n Parameter for the Minkowski metric from\n :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n is equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncontamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. When fitting this is used to define the\n threshold on the scores of the samples.\n\n - if 'auto', the threshold is determined as in the\n original paper,\n - if a float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\nnovelty : bool, default=False\n By default, LocalOutlierFactor is only meant to be used for outlier\n detection (novelty=False). Set novelty to True if you want to use\n LocalOutlierFactor for novelty detection. In this case be aware that\n that you should only use predict, decision_function and score_samples\n on new unseen data and not on the training set.\n\n .. versionadded:: 0.20\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nnegative_outlier_factor_ : ndarray of shape (n_samples,)\n The opposite LOF of the training samples. The higher, the more normal.\n Inliers tend to have a LOF score close to 1\n (``negative_outlier_factor_`` close to -1), while outliers tend to have\n a larger LOF score.\n\n The local outlier factor (LOF) of a sample captures its\n supposed 'degree of abnormality'.\n It is the average of the ratio of the local reachability density of\n a sample and those of its k-nearest neighbors.\n\nn_neighbors_ : int\n The actual number of neighbors used for :meth:`kneighbors` queries.\n\noffset_ : float\n Offset used to obtain binary labels from the raw scores.\n Observations having a negative_outlier_factor smaller than `offset_`\n are detected as abnormal.\n The offset is set to -1.5 (inliers score around -1), except when a\n contamination parameter different than \"auto\" is provided. In that\n case, the offset is defined in such a way we obtain the expected\n number of outliers in training.\n\n .. versionadded:: 0.20\n\neffective_metric_ : str\n The effective metric used for the distance computation.\n\neffective_metric_params_ : dict\n The effective additional keyword arguments for the metric function.\n\nn_samples_fit_ : int\n It is the number of samples in the fitted data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import LocalOutlierFactor\n>>> X = [[-1.1], [0.2], [101.1], [0.3]]\n>>> clf = LocalOutlierFactor(n_neighbors=2)\n>>> clf.fit_predict(X)\narray([ 1, 1, -1, 1])\n>>> clf.negative_outlier_factor_\narray([ -0.9821..., -1.0370..., -73.3697..., -0.9821...])\n\nReferences\n----------\n.. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n LOF: identifying density-based local outliers. In ACM sigmod record." - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._nca", - "imports": [ - "from __future__ import print_function", - "from warnings import warn", - "import numpy as np", - "import sys", - "import time", - "import numbers", - "from scipy.optimize import minimize", - "from utils.extmath import softmax", - "from metrics import pairwise_distances", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from preprocessing import LabelEncoder", - "from decomposition import PCA", - "from utils.multiclass import check_classification_targets", - "from utils.random import check_random_state", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import check_scalar", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning", - "from discriminant_analysis import LinearDiscriminantAnalysis" - ], - "classes": [ - { - "name": "NeighborhoodComponentsAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preferred dimensionality of the projected space. If None it will be set to ``n_features``." - }, - { - "name": "init", - "type": "Literal['auto', 'pca', 'lda', 'identity', 'random']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Initialization of the linear transformation. Possible options are 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape (n_features_a, n_features_b). 'auto' Depending on ``n_components``, the most reasonable initialization will be chosen. If ``n_components <= n_classes`` we use 'lda', as it uses labels information. If not, but ``n_components < min(n_features, n_samples)``, we use 'pca', as it projects data in meaningful directions (those of higher variance). Otherwise, we just use 'identity'. 'pca' ``n_components`` principal components of the inputs passed to :meth:`fit` will be used to initialize the transformation. (See :class:`~sklearn.decomposition.PCA`) 'lda' ``min(n_components, n_classes)`` most discriminative components of the inputs passed to :meth:`fit` will be used to initialize the transformation. (If ``n_components > n_classes``, the rest of the components will be zero.) (See :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) 'identity' If ``n_components`` is strictly smaller than the dimensionality of the inputs passed to :meth:`fit`, the identity matrix will be truncated to the first ``n_components`` rows. 'random' The initial transformation will be a random array of shape `(n_components, n_features)`. Each value is sampled from the standard normal distribution. numpy array n_features_b must match the dimensionality of the inputs passed to :meth:`fit` and n_features_a must be less than or equal to that. If ``n_components`` is not None, n_features_a must match it." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True and :meth:`fit` has been called before, the solution of the previous call to :meth:`fit` is used as the initial linear transformation (``n_components`` and ``init`` will be ignored)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations in the optimization." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-5", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance for the optimization." - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, this function is called after every iteration of the optimizer, taking as arguments the current solution (flattened transformation matrix) and the number of iterations. This might be useful in case one wants to examine or store the transformation found after each iteration." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If 0, no progress messages will be printed. If 1, progress messages will be printed to stdout. If > 1, progress messages will be printed and the ``disp`` parameter of :func:`scipy.optimize.minimize` will be set to ``verbose - 2``." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random transformation. If ``init='pca'``, ``random_state`` is passed as an argument to PCA when initializing the transformation. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The corresponding training labels." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The corresponding training labels.\n\nReturns\n-------\nself : object\n returns a trained NeighborhoodComponentsAnalysis model." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Applies the learned transformation to the given data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data samples.\n\nReturns\n-------\nX_embedded: ndarray of shape (n_samples, n_components)\n The data samples transformed.\n\nRaises\n------\nNotFittedError\n If :meth:`fit` has not been called before." - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The corresponding training labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate parameters as soon as :meth:`fit` is called.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The corresponding training labels.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The validated training samples.\n\ny : ndarray of shape (n_samples,)\n The validated training labels, encoded to be integers in\n the range(0, n_classes).\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\nRaises\n-------\nTypeError\n If a parameter is not an instance of the desired type.\n\nValueError\n If a parameter's value violates its legal value range or if the\n combination of two or more given parameters is incompatible." - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training labels." - }, - { - "name": "init", - "type": "Union[NDArray, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The validated initialization of the linear transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the transformation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The training labels.\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\nReturns\n-------\ntransformation : ndarray of shape (n_components, n_features)\n The initialized linear transformation." - }, - { - "name": "_callback", - "decorators": [], - "parameters": [ - { - "name": "transformation", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The solution computed by the optimizer in this iteration." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called after each iteration of the optimizer.\n\nParameters\n----------\ntransformation : ndarray of shape (n_components * n_features,)\n The solution computed by the optimizer in this iteration." - }, - { - "name": "_loss_grad_lbfgs", - "decorators": [], - "parameters": [ - { - "name": "transformation", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raveled linear transformation on which to compute loss and evaluate gradient." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "same_class_mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong to the same class, and ``0`` otherwise." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the loss and the loss gradient w.r.t. ``transformation``.\n\nParameters\n----------\ntransformation : ndarray of shape (n_components * n_features,)\n The raveled linear transformation on which to compute loss and\n evaluate gradient.\n\nX : ndarray of shape (n_samples, n_features)\n The training samples.\n\nsame_class_mask : ndarray of shape (n_samples, n_samples)\n A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong\n to the same class, and ``0`` otherwise.\n\nReturns\n-------\nloss : float\n The loss computed for the given transformation.\n\ngradient : ndarray of shape (n_components * n_features,)\n The new (flattened) gradient of the loss." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Neighborhood Components Analysis\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Preferred dimensionality of the projected space.\n If None it will be set to ``n_features``.\n\ninit : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape (n_features_a, n_features_b), default='auto'\n Initialization of the linear transformation. Possible options are\n 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape\n (n_features_a, n_features_b).\n\n 'auto'\n Depending on ``n_components``, the most reasonable initialization\n will be chosen. If ``n_components <= n_classes`` we use 'lda', as\n it uses labels information. If not, but\n ``n_components < min(n_features, n_samples)``, we use 'pca', as\n it projects data in meaningful directions (those of higher\n variance). Otherwise, we just use 'identity'.\n\n 'pca'\n ``n_components`` principal components of the inputs passed\n to :meth:`fit` will be used to initialize the transformation.\n (See :class:`~sklearn.decomposition.PCA`)\n\n 'lda'\n ``min(n_components, n_classes)`` most discriminative\n components of the inputs passed to :meth:`fit` will be used to\n initialize the transformation. (If ``n_components > n_classes``,\n the rest of the components will be zero.) (See\n :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n 'identity'\n If ``n_components`` is strictly smaller than the\n dimensionality of the inputs passed to :meth:`fit`, the identity\n matrix will be truncated to the first ``n_components`` rows.\n\n 'random'\n The initial transformation will be a random array of shape\n `(n_components, n_features)`. Each value is sampled from the\n standard normal distribution.\n\n numpy array\n n_features_b must match the dimensionality of the inputs passed to\n :meth:`fit` and n_features_a must be less than or equal to that.\n If ``n_components`` is not None, n_features_a must match it.\n\nwarm_start : bool, default=False\n If True and :meth:`fit` has been called before, the solution of the\n previous call to :meth:`fit` is used as the initial linear\n transformation (``n_components`` and ``init`` will be ignored).\n\nmax_iter : int, default=50\n Maximum number of iterations in the optimization.\n\ntol : float, default=1e-5\n Convergence tolerance for the optimization.\n\ncallback : callable, default=None\n If not None, this function is called after every iteration of the\n optimizer, taking as arguments the current solution (flattened\n transformation matrix) and the number of iterations. This might be\n useful in case one wants to examine or store the transformation\n found after each iteration.\n\nverbose : int, default=0\n If 0, no progress messages will be printed.\n If 1, progress messages will be printed to stdout.\n If > 1, progress messages will be printed and the ``disp``\n parameter of :func:`scipy.optimize.minimize` will be set to\n ``verbose - 2``.\n\nrandom_state : int or numpy.RandomState, default=None\n A pseudo random number generator object or a seed for it if int. If\n ``init='random'``, ``random_state`` is used to initialize the random\n transformation. If ``init='pca'``, ``random_state`` is passed as an\n argument to PCA when initializing the transformation. Pass an int\n for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The linear transformation learned during fitting.\n\nn_iter_ : int\n Counts the number of iterations performed by the optimizer.\n\nrandom_state_ : numpy.RandomState\n Pseudo random number generator object used during initialization.\n\nExamples\n--------\n>>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... stratify=y, test_size=0.7, random_state=42)\n>>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n>>> nca.fit(X_train, y_train)\nNeighborhoodComponentsAnalysis(...)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> knn.fit(X_train, y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(X_test, y_test))\n0.933333...\n>>> knn.fit(nca.transform(X_train), y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(nca.transform(X_test), y_test))\n0.961904...\n\nReferences\n----------\n.. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n \"Neighbourhood Components Analysis\". Advances in Neural Information\n Processing Systems. 17, 513-520, 2005.\n http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n.. [2] Wikipedia entry on Neighborhood Components Analysis\n https://en.wikipedia.org/wiki/Neighbourhood_components_analysis" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._nearest_centroid", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import sparse as sp", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from metrics.pairwise import pairwise_distances", - "from preprocessing import LabelEncoder", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.sparsefuncs import csc_median_axis_0", - "from utils.multiclass import check_classification_targets" - ], - "classes": [ - { - "name": "NearestCentroid", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by metrics.pairwise.pairwise_distances for its metric parameter. The centroids for the samples corresponding to each class is the point from which the sum of the distances (according to the metric) of all samples that belong to that particular class are minimized. If the \"manhattan\" metric is provided, this centroid is the median and for all other metrics, the centroid is now set to be the mean. .. versionchanged:: 0.19 ``metric='precomputed'`` was deprecated and now raises an error" - }, - { - "name": "shrink_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for shrinking centroids to remove features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features. Note that centroid shrinking cannot be used with sparse matrices." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the NearestCentroid model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n Note that centroid shrinking cannot be used with sparse matrices.\ny : array-like of shape (n_samples,)\n Target values (integers)" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n\nNotes\n-----\nIf the metric constructor parameter is \"precomputed\", X is assumed to\nbe the distance matrix between the data to be predicted and\n``self.centroids_``." - } - ], - "docstring": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmetric : str or callable\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by metrics.pairwise.pairwise_distances for its\n metric parameter.\n The centroids for the samples corresponding to each class is the point\n from which the sum of the distances (according to the metric) of all\n samples that belong to that particular class are minimized.\n If the \"manhattan\" metric is provided, this centroid is the median and\n for all other metrics, the centroid is now set to be the mean.\n\n .. versionchanged:: 0.19\n ``metric='precomputed'`` was deprecated and now raises an error\n\nshrink_threshold : float, default=None\n Threshold for shrinking centroids to remove features.\n\nAttributes\n----------\ncentroids_ : array-like of shape (n_classes, n_features)\n Centroid of each class.\n\nclasses_ : array of shape (n_classes,)\n The unique classes labels.\n\nExamples\n--------\n>>> from sklearn.neighbors import NearestCentroid\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = NearestCentroid()\n>>> clf.fit(X, y)\nNearestCentroid()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nKNeighborsClassifier : Nearest neighbors classifier.\n\nNotes\n-----\nWhen used for text classification with tf-idf vectors, this classifier is\nalso known as the Rocchio classifier.\n\nReferences\n----------\nTibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\nmultiple cancer types by shrunken centroids of gene expression. Proceedings\nof the National Academy of Sciences of the United States of America,\n99(10), 6567-6572. The National Academy of Sciences." - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._regression", - "imports": [ - "import warnings", - "import numpy as np", - "from _base import _get_weights", - "from _base import _check_weights", - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from base import RegressorMixin", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "KNeighborsRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. Doesn't affect :meth:`fit` method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the k-nearest neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : KNeighborsRegressor\n The fitted k-nearest neighbors regressor." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the target for the provided data\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n Target values." - } - ], - "docstring": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsRegressor\n>>> neigh = KNeighborsRegressor(n_neighbors=2)\n>>> neigh.fit(X, y)\nKNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]\n\nSee Also\n--------\nNearestNeighbors\nRadiusNeighborsRegressor\nKNeighborsClassifier\nRadiusNeighborsClassifier\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances but\n different labels, the results will depend on the ordering of the\n training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - }, - { - "name": "RadiusNeighborsRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Range of parameter space to use by default for :meth:`radius_neighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the radius neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : RadiusNeighborsRegressor\n The fitted radius neighbors regressor." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the target for the provided data\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=double\n Target values." - } - ], - "docstring": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsRegressor\n>>> neigh = RadiusNeighborsRegressor(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]\n\nSee Also\n--------\nNearestNeighbors\nKNeighborsRegressor\nKNeighborsClassifier\nRadiusNeighborsClassifier\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._unsupervised", - "imports": [ - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "NearestNeighbors", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Range of parameter space to use by default for :meth:`radius_neighbors` queries." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the nearest neighbors estimator from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : NearestNeighbors\n The fitted nearest neighbors estimator." - } - ], - "docstring": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\neffective_metric_ : str\n Metric used to compute distances to neighbors.\n\neffective_metric_params_ : dict\n Parameters for the metric used to compute distances to neighbors.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import NearestNeighbors\n>>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n>>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n>>> neigh.fit(samples)\nNearestNeighbors(...)\n\n>>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\narray([[2, 0]]...)\n\n>>> nbrs = neigh.radius_neighbors(\n... [[0, 0, 1.3]], 0.4, return_distance=False\n... )\n>>> np.asarray(nbrs[0][0])\narray(2)\n\nSee Also\n--------\nKNeighborsClassifier\nRadiusNeighborsClassifier\nKNeighborsRegressor\nRadiusNeighborsRegressor\nBallTree\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors", - "imports": [ - "from _ball_tree import BallTree", - "from _kd_tree import KDTree", - "from _dist_metrics import DistanceMetric", - "from _graph import kneighbors_graph", - "from _graph import radius_neighbors_graph", - "from _graph import KNeighborsTransformer", - "from _graph import RadiusNeighborsTransformer", - "from _unsupervised import NearestNeighbors", - "from _classification import KNeighborsClassifier", - "from _classification import RadiusNeighborsClassifier", - "from _regression import KNeighborsRegressor", - "from _regression import RadiusNeighborsRegressor", - "from _nearest_centroid import NearestCentroid", - "from _kde import KernelDensity", - "from _lof import LocalOutlierFactor", - "from _nca import NeighborhoodComponentsAnalysis", - "from _base import VALID_METRICS", - "from _base import VALID_METRICS_SPARSE" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neighbors.tests.test_ball_tree", - "imports": [ - "import itertools", - "import numpy as np", - "import pytest", - "from numpy.testing import assert_array_almost_equal", - "from sklearn.neighbors._ball_tree import BallTree", - "from sklearn.neighbors import DistanceMetric", - "from sklearn.utils import check_random_state", - "from sklearn.utils.validation import check_array", - "from sklearn.utils._testing import _convert_container" - ], - "classes": [], - "functions": [ - { - "name": "brute_force_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ball_tree_query_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_query_haversine", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_array_object_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we do not accept object dtype array." - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_dist_metrics", - "imports": [ - "import itertools", - "import pickle", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "import pytest", - "from scipy.spatial.distance import cdist", - "from sklearn.neighbors import DistanceMetric", - "from sklearn.neighbors import BallTree", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version" - ], - "classes": [], - "functions": [ - { - "name": "dist_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cdist_bool_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cdist_bool", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pdist_bool_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pdist_bool", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_bool_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_haversine_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pyfunc_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_pyfunc_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_data_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_graph", - "imports": [ - "import numpy as np", - "from sklearn.metrics import euclidean_distances", - "from sklearn.neighbors import KNeighborsTransformer", - "from sklearn.neighbors import RadiusNeighborsTransformer", - "from sklearn.neighbors._base import _is_sorted_by_data" - ], - "classes": [], - "functions": [ - { - "name": "test_transformer_result", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_has_explicit_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the diagonal is explicitly stored" - }, - { - "name": "test_explicit_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_kde", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raises", - "from sklearn.neighbors import KernelDensity", - "from sklearn.neighbors import KDTree", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.neighbors._ball_tree import kernel_norm", - "from sklearn.pipeline import make_pipeline", - "from sklearn.datasets import make_blobs", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.exceptions import NotFittedError", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "compute_kernel_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_density_sampling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_algorithm_metric_choice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_badargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_pipeline_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_kd_tree", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.neighbors._kd_tree import KDTree" - ], - "classes": [], - "functions": [ - { - "name": "test_array_object_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we do not accept object dtype array." - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_lof", - "imports": [ - "from math import sqrt", - "import numpy as np", - "from sklearn import neighbors", - "import pytest", - "from numpy.testing import assert_array_equal", - "from sklearn import metrics", - "from sklearn.metrics import roc_auc_score", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils.estimator_checks import check_estimator", - "from sklearn.utils.estimator_checks import check_outlier_corruption", - "from sklearn.datasets import load_iris" - ], - "classes": [], - "functions": [ - { - "name": "test_lof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_performance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests LOF with a distance matrix." - }, - { - "name": "test_n_neighbors_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_contamination", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_novelty_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_novelty_training_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasattr_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_novelty_true_common_tests", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predicted_outlier_number", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_nca", - "imports": [ - "import pytest", - "import re", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from scipy.optimize import check_grad", - "from sklearn import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.neighbors import NeighborhoodComponentsAnalysis", - "from sklearn.metrics import pairwise_distances" - ], - "classes": [], - "functions": [ - { - "name": "test_simple_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test on a simple example.\n\nPuts four points in the input space where the opposite labels points are\nnext to each other. After transform the samples from the same class\nshould be next to each other." - }, - { - "name": "test_toy_example_collapse_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test on a toy example of three points that should collapse\n\nWe build a simple example: two points from the same class and a point from\na different class in the middle of them. On this simple example, the new\n(transformed) points should all collapse into one single point. Indeed, the\nobjective is 2/(1 + exp(d/2)), with d the euclidean distance between the\ntwo samples from the same class. This is maximized for d=0 (because d>=0),\nwith an objective equal to 1 (loss=-1.)." - }, - { - "name": "test_finite_differences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test gradient of loss function\n\nAssert that the gradient is almost equal to its finite differences\napproximation." - }, - { - "name": "test_params_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transformation_dimensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_transformation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_effectiveness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singleton_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callback", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_expected_transformation_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the transformation has the expected shape." - }, - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameters_valid_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_nearest_centroid", - "imports": [ - "import numpy as np", - "from scipy import sparse as sp", - "from numpy.testing import assert_array_equal", - "from sklearn.neighbors import NearestCentroid", - "from sklearn import datasets", - "from sklearn.utils._testing import assert_raises", - "import pickle" - ], - "classes": [], - "functions": [ - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shrinkage_correct", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shrinkage_threshold_decoded_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_translated_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_manhattan_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_features_zero_var", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_neighbors", - "imports": [ - "from itertools import product", - "import pytest", - "import numpy as np", - "from scipy.sparse import bsr_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from scipy.sparse import issparse", - "from sklearn import metrics", - "from sklearn import neighbors", - "from sklearn import datasets", - "from sklearn.base import clone", - "from sklearn.exceptions import DataConversionWarning", - "from sklearn.exceptions import EfficiencyWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import train_test_split", - "from sklearn.neighbors import VALID_METRICS_SPARSE", - "from sklearn.neighbors import VALID_METRICS", - "from sklearn.neighbors._base import _is_sorted_by_data", - "from sklearn.neighbors._base import _check_precomputed", - "from sklearn.pipeline import make_pipeline", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "_weight_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Weight function to replace lambda d: d ** -2.\nThe lambda function is not valid because:\nif d==0 then 0^-2 is not valid. " - }, - { - "name": "test_unsupervised_kneighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupervised_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_neighbors_datatype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_fitted_error_gets_raised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests unsupervised NearestNeighbors with a distance matrix." - }, - { - "name": "test_precomputed_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_sparse_knn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_sparse_radius", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_sorted_by_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_sparse_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_cross_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupervised_radius_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier_float_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier_when_no_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier_outlier_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier_zero_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_regressors_zero_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_boundary_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether points lying on boundary are handled consistently\n\nAlso ensures that even with only one query point, an object array\nis returned rather than a 2d array." - }, - { - "name": "test_radius_neighbors_returns_array_of_objects", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_sort_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_RadiusNeighborsClassifier_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_KNeighborsClassifier_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_KNeighborsRegressor_multioutput_uniform_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_RadiusNeighborsRegressor_multioutput_with_uniform_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_RadiusNeighborsRegressor_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_graph_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_graph_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_badargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_valid_brute_metric_for_auto_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_metric_params_interface", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_sparse_ball_kd_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_euclidean_kneighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_object_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_and_radius_neighbors_train_is_not_query", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_and_radius_neighbors_X_None", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_and_radius_neighbors_duplicates", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_include_self_neighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_knn_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_radius_neighbors_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_forcing_backend", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_convert", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_metric_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_boolean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_with_nearest_neighbors_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_neighbors_pipeline", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.datasets import make_blobs", - "from sklearn.pipeline import make_pipeline", - "from sklearn.base import clone", - "from sklearn.neighbors import KNeighborsTransformer", - "from sklearn.neighbors import RadiusNeighborsTransformer", - "from sklearn.cluster import DBSCAN", - "from sklearn.cluster import SpectralClustering", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.neighbors import RadiusNeighborsRegressor", - "from sklearn.neighbors import LocalOutlierFactor", - "from sklearn.manifold import SpectralEmbedding", - "from sklearn.manifold import Isomap", - "from sklearn.manifold import TSNE" - ], - "classes": [], - "functions": [ - { - "name": "test_spectral_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isomap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_novelty_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_novelty_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_neighbors_tree", - "imports": [ - "import pickle", - "import itertools", - "import numpy as np", - "import pytest", - "from sklearn.neighbors import DistanceMetric", - "from sklearn.neighbors._ball_tree import BallTree", - "from sklearn.neighbors._ball_tree import kernel_norm", - "from sklearn.neighbors._ball_tree import DTYPE", - "from sklearn.neighbors._ball_tree import ITYPE", - "from sklearn.neighbors._ball_tree import NeighborsHeap as NeighborsHeapBT", - "from sklearn.neighbors._ball_tree import simultaneous_sort as simultaneous_sort_bt", - "from sklearn.neighbors._ball_tree import nodeheap_sort as nodeheap_sort_bt", - "from sklearn.neighbors._kd_tree import KDTree", - "from sklearn.neighbors._kd_tree import NeighborsHeap as NeighborsHeapKDT", - "from sklearn.neighbors._kd_tree import simultaneous_sort as simultaneous_sort_kdt", - "from sklearn.neighbors._kd_tree import nodeheap_sort as nodeheap_sort_kdt", - "from sklearn.utils import check_random_state", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_allclose", - "from scipy.stats import gaussian_kde" - ], - "classes": [], - "functions": [ - { - "name": "dist_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "compute_kernel_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "brute_force_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbor_tree_query_radius", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbor_tree_query_radius_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbor_tree_two_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_heap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_node_heap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simultaneous_sort", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_kde", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nn_tree_query", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_quad_tree", - "imports": [ - "import pickle", - "import numpy as np", - "import pytest", - "from sklearn.neighbors._quad_tree import _QuadTree", - "from sklearn.utils import check_random_state" - ], - "classes": [], - "functions": [ - { - "name": "test_quadtree_boundary_computation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quadtree_similar_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quad_tree_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qt_insert_duplicate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_summarize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neural_network._base", - "imports": [ - "import numpy as np", - "from scipy.special import expit as logistic_sigmoid", - "from scipy.special import xlogy" - ], - "classes": [], - "functions": [ - { - "name": "inplace_identity", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Simply leave the input array unchanged.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Data, where n_samples is the number of samples\n and n_features is the number of features." - }, - { - "name": "inplace_logistic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the logistic function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_tanh", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the hyperbolic tan function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_relu", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the rectified linear unit function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_softmax", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the K-way softmax function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_identity_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the identity activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the identity function: do nothing.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the identity activation function during\n the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "inplace_logistic_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the logistic activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the logistic sigmoid function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from logistic function.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the logistic activation function during\n the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "inplace_tanh_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the hyperbolic tangent activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the hyperbolic tanh function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from hyperbolic tangent.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the hyperbolic tangent activation\n function during the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "inplace_relu_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the rectified linear units activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the relu function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from rectified linear units activation function.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the rectified linear units activation\n function during the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "squared_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted values, as returned by a regression estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the squared loss for regression.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) values.\n\ny_pred : array-like or label indicator matrix\n Predicted values, as returned by a regression estimator.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted." - }, - { - "name": "log_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_prob", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted probabilities, as returned by a classifier's predict_proba method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Logistic loss for classification.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\ny_prob : array-like of float, shape = (n_samples, n_classes)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted." - }, - { - "name": "binary_log_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_prob", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted probabilities, as returned by a classifier's predict_proba method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute binary logistic loss for classification.\n\nThis is identical to log_loss in binary classification case,\nbut is kept for its use in multilabel case.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\ny_prob : array-like of float, shape = (n_samples, 1)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted." - } - ] - }, - { - "name": "sklearn.neural_network._multilayer_perceptron", - "imports": [ - "import numpy as np", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import scipy.optimize", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import is_classifier", - "from _base import ACTIVATIONS", - "from _base import DERIVATIVES", - "from _base import LOSS_FUNCTIONS", - "from _stochastic_optimizers import SGDOptimizer", - "from _stochastic_optimizers import AdamOptimizer", - "from model_selection import train_test_split", - "from preprocessing import LabelBinarizer", - "from utils import gen_batches", - "from utils import check_random_state", - "from utils import shuffle", - "from utils import _safe_indexing", - "from utils import column_or_1d", - "from exceptions import ConvergenceWarning", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.multiclass import unique_labels", - "from utils.multiclass import type_of_target", - "from utils.optimize import _check_optimize_result" - ], - "classes": [ - { - "name": "BaseMultilayerPerceptron", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_unpack", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract the coefficients and intercepts from packed_parameters." - }, - { - "name": "_forward_pass", - "decorators": [], - "parameters": [ - { - "name": "activations", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the values of the ith layer." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform a forward pass on the network by computing the values\nof the neurons in the hidden layers and the output layer.\n\nParameters\n----------\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer." - }, - { - "name": "_forward_pass_fast", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the trained model\n\nThis is the same as _forward_pass but does not record the activations\nof all layers and only returns the last layer's activation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The decision function of the samples for each class in the model." - }, - { - "name": "_compute_loss_grad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the gradient of loss with respect to coefs and intercept for\nspecified layer.\n\nThis function does backpropagation for the specified one layer." - }, - { - "name": "_loss_grad_lbfgs", - "decorators": [], - "parameters": [ - { - "name": "packed_coef_inter", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A vector comprising the flattened coefficients and intercepts." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "activations", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the values of the ith layer." - }, - { - "name": "deltas", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the difference between the activations of the i + 1 layer and the backpropagated error. More specifically, deltas are gradients of loss with respect to z in each layer, where z = wx + b is the value of a particular layer before passing through the activation function" - }, - { - "name": "coef_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the coefficient parameters of the ith layer in an iteration." - }, - { - "name": "intercept_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the intercept parameters of the ith layer in an iteration." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the MLP loss function and its corresponding derivatives\nwith respect to the different parameters given in the initialization.\n\nReturned gradients are packed in a single vector so it can be used\nin lbfgs\n\nParameters\n----------\npacked_coef_inter : ndarray\n A vector comprising the flattened coefficients and intercepts.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\ndeltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\ncoef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\nintercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\nReturns\n-------\nloss : float\ngrad : array-like, shape (number of nodes of all layers,)" - }, - { - "name": "_backprop", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "activations", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the values of the ith layer." - }, - { - "name": "deltas", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the difference between the activations of the i + 1 layer and the backpropagated error. More specifically, deltas are gradients of loss with respect to z in each layer, where z = wx + b is the value of a particular layer before passing through the activation function" - }, - { - "name": "coef_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the coefficient parameters of the ith layer in an iteration." - }, - { - "name": "intercept_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the intercept parameters of the ith layer in an iteration." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the MLP loss function and its corresponding derivatives\nwith respect to each parameter: weights and bias vectors.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\ndeltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\ncoef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\nintercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\nReturns\n-------\nloss : float\ncoef_grads : list, length = n_layers - 1\nintercept_grads : list, length = n_layers - 1" - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_init_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_lbfgs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_stochastic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_no_improvement_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and target(s) y.\n\nParameters\n----------\nX : ndarray or sparse matrix of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\nReturns\n-------\nself : returns a trained MLP model." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nReturns\n-------\nself : returns a trained MLP model." - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18" - }, - { - "name": "MLPClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "hidden_layer_sizes", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element represents the number of neurons in the ith hidden layer." - }, - { - "name": "activation", - "type": "Literal['identity', 'logistic', 'tanh', 'relu']", - "hasDefault": true, - "default": "'relu'", - "limitation": null, - "ignored": false, - "docstring": "Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, returns f(x) = x - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x)" - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'sgd', 'adam']", - "hasDefault": true, - "default": "'adam'", - "limitation": null, - "ignored": false, - "docstring": "The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'lbfgs' can converge faster and perform better." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 penalty (regularization term) parameter." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to \"auto\", `batch_size=min(200, n_samples)`" - }, - { - "name": "learning_rate", - "type": "Literal['constant', 'invscaling', 'adaptive']", - "hasDefault": true, - "default": "'constant'", - "limitation": null, - "ignored": false, - "docstring": "Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by 'learning_rate_init'. - 'invscaling' gradually decreases the learning rate at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) - 'adaptive' keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when ``solver='sgd'``." - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for weights and bias initialization, train-test split if early stopping is used, and batch sampling when solver='sgd' or 'adam'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to print progress messages to stdout." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "momentum", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'." - }, - { - "name": "nesterovs_momentum", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for ``n_iter_no_change`` consecutive epochs. The split is stratified, except in a multilabel setting. Only effective when solver='sgd' or 'adam'" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True" - }, - { - "name": "beta_1", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "beta_2", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Value for numerical stability in adam. Only used when solver='adam'" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' .. versionadded:: 0.20" - }, - { - "name": "max_fun", - "type": "int", - "hasDefault": true, - "default": "15000", - "limitation": null, - "ignored": false, - "docstring": "Only used when solver='lbfgs'. Maximum number of loss function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of loss function calls. Note that number of loss function calls will be greater than or equal to the number of iterations for the `MLPClassifier`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the multi-layer perceptron classifier\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny : ndarray, shape (n_samples,) or (n_samples, n_classes)\n The predicted classes." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "classes", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : array-like of shape (n_samples,)\n The target values.\n\nclasses : array of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : returns a trained MLP model." - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the log of probability estimates.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nlog_y_prob : ndarray of shape (n_samples, n_classes)\n The predicted log-probability of the sample for each class\n in the model, where classes are ordered as they are in\n `self.classes_`. Equivalent to log(predict_proba(X))" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny_prob : ndarray of shape (n_samples, n_classes)\n The predicted probability of the sample for each class in the\n model, where classes are ordered as they are in `self.classes_`." - } - ], - "docstring": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed\n by Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\nalpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\nbatch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate at each\n time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when ``solver='sgd'``.\n\nlearning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\nshuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\nverbose : bool, default=False\n Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nmomentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least tol for\n ``n_iter_no_change`` consecutive epochs. The split is stratified,\n except in a multilabel setting.\n Only effective when solver='sgd' or 'adam'\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nepsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\nn_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of loss function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of loss function calls.\n Note that number of loss function calls will be greater than or equal\n to the number of iterations for the `MLPClassifier`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output.\n\nloss_ : float\n The current loss computed with the loss function.\n\nbest_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\nn_iter_ : int\n The number of iterations the solver has ran.\n\nn_layers_ : int\n Number of layers.\n\nn_outputs_ : int\n Number of outputs.\n\nout_activation_ : str\n Name of the output activation function.\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPClassifier\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n... random_state=1)\n>>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n>>> clf.predict_proba(X_test[:1])\narray([[0.038..., 0.961...]])\n>>> clf.predict(X_test[:5, :])\narray([1, 0, 1, 0, 1])\n>>> clf.score(X_test, y_test)\n0.8...\n\nNotes\n-----\nMLPClassifier trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\nHe, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\nKingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014)." - }, - { - "name": "MLPRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "hidden_layer_sizes", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element represents the number of neurons in the ith hidden layer." - }, - { - "name": "activation", - "type": "Literal['identity', 'logistic', 'tanh', 'relu']", - "hasDefault": true, - "default": "'relu'", - "limitation": null, - "ignored": false, - "docstring": "Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, returns f(x) = x - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x)" - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'sgd', 'adam']", - "hasDefault": true, - "default": "'adam'", - "limitation": null, - "ignored": false, - "docstring": "The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'lbfgs' can converge faster and perform better." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 penalty (regularization term) parameter." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to \"auto\", `batch_size=min(200, n_samples)`" - }, - { - "name": "learning_rate", - "type": "Literal['constant', 'invscaling', 'adaptive']", - "hasDefault": true, - "default": "'constant'", - "limitation": null, - "ignored": false, - "docstring": "Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by 'learning_rate_init'. - 'invscaling' gradually decreases the learning rate ``learning_rate_`` at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) - 'adaptive' keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when solver='sgd'." - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for weights and bias initialization, train-test split if early stopping is used, and batch sampling when solver='sgd' or 'adam'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to print progress messages to stdout." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "momentum", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'." - }, - { - "name": "nesterovs_momentum", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive epochs. Only effective when solver='sgd' or 'adam'" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True" - }, - { - "name": "beta_1", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "beta_2", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Value for numerical stability in adam. Only used when solver='adam'" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' .. versionadded:: 0.20" - }, - { - "name": "max_fun", - "type": "int", - "hasDefault": true, - "default": "15000", - "limitation": null, - "ignored": false, - "docstring": "Only used when solver='lbfgs'. Maximum number of function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of function calls. Note that number of function calls will be greater than or equal to the number of iterations for the MLPRegressor. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the multi-layer perceptron model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_outputs)\n The predicted values." - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared-loss using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed by\n Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\nalpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\nbatch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n at each time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when solver='sgd'.\n\nlearning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\nshuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\nverbose : bool, default=False\n Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nmomentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least ``tol`` for\n ``n_iter_no_change`` consecutive epochs.\n Only effective when solver='sgd' or 'adam'\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nepsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\nn_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of function calls.\n Note that number of function calls will be greater than or equal to\n the number of iterations for the MLPRegressor.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nloss_ : float\n The current loss computed with the loss function.\n\nbest_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\nn_iter_ : int\n The number of iterations the solver has ran.\n\nn_layers_ : int\n Number of layers.\n\nn_outputs_ : int\n Number of outputs.\n\nout_activation_ : str\n Name of the output activation function.\n\nloss_curve_ : list of shape (n_iters,)\n Loss value evaluated at the end of each training step.\n\nt_ : int\n Mathematically equals `n_iters * X.shape[0]`, it means\n `time_step` and it is used by optimizer's learning rate scheduler.\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPRegressor\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(n_samples=200, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=1)\n>>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n>>> regr.predict(X_test[:2])\narray([-0.9..., -7.1...])\n>>> regr.score(X_test, y_test)\n0.4...\n\nNotes\n-----\nMLPRegressor trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense and sparse numpy\narrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\nHe, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\nKingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014)." - } - ], - "functions": [ - { - "name": "_pack", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pack the parameters into a single vector." - } - ] - }, - { - "name": "sklearn.neural_network._rbm", - "imports": [ - "import time", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.special import expit", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import check_random_state", - "from utils import gen_even_slices", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import log_logistic", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BernoulliRBM", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "256", - "limitation": null, - "ignored": false, - "docstring": "Number of binary hidden units." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The learning rate for weight updates. It is *highly* recommended to tune this hyper-parameter. Reasonable values are in the 10**[0., -3.] range." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of examples per minibatch." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations/sweeps over the training dataset to perform during training." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level. The default, zero, means silent mode." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for: - Gibbs sampling from visible and hidden layers. - Initializing components, sampling from layers during fit. - Corrupting the data when scoring samples. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the hidden layer activation probabilities, P(h=1|v=X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to be transformed.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Latent representations of the data." - }, - { - "name": "_mean_hiddens", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the probabilities P(h=1|v).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Corresponding mean field values for the hidden layer." - }, - { - "name": "_sample_hiddens", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer to sample from." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Random number generator to use." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sample from the distribution P(h|v).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer to sample from.\n\nrng : RandomState instance\n Random number generator to use.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Values of the hidden layer." - }, - { - "name": "_sample_visibles", - "decorators": [], - "parameters": [ - { - "name": "h", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the hidden layer to sample from." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Random number generator to use." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sample from the distribution P(v|h).\n\nParameters\n----------\nh : ndarray of shape (n_samples, n_components)\n Values of the hidden layer to sample from.\n\nrng : RandomState instance\n Random number generator to use.\n\nReturns\n-------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer." - }, - { - "name": "_free_energy", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\nReturns\n-------\nfree_energy : ndarray of shape (n_samples,)\n The value of the free energy." - }, - { - "name": "gibbs", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer to start from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform one Gibbs sampling step.\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer to start from.\n\nReturns\n-------\nv_new : ndarray of shape (n_samples, n_features)\n Values of the visible layer after one Gibbs step." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to the data X which should contain a partial\nsegment of the data.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nself : BernoulliRBM\n The fitted model." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "v_pos", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to use for training." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Random number generator to use for sampling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inner fit for one mini-batch.\n\nAdjust the parameters to maximize the likelihood of v using\nStochastic Maximum Likelihood (SML).\n\nParameters\n----------\nv_pos : ndarray of shape (n_samples, n_features)\n The data to use for training.\n\nrng : RandomState instance\n Random number generator to use for sampling." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer. Must be all-boolean (not checked)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the pseudo-likelihood of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Values of the visible layer. Must be all-boolean (not checked).\n\nReturns\n-------\npseudo_likelihood : ndarray of shape (n_samples,)\n Value of the pseudo-likelihood (proxy for likelihood).\n\nNotes\n-----\nThis method is not deterministic: it computes a quantity called the\nfree energy on X, then on a randomly corrupted version of X, and\nreturns the log of the logistic function of the difference." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nself : BernoulliRBM\n The fitted model." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=256\n Number of binary hidden units.\n\nlearning_rate : float, default=0.1\n The learning rate for weight updates. It is *highly* recommended\n to tune this hyper-parameter. Reasonable values are in the\n 10**[0., -3.] range.\n\nbatch_size : int, default=10\n Number of examples per minibatch.\n\nn_iter : int, default=10\n Number of iterations/sweeps over the training dataset to perform\n during training.\n\nverbose : int, default=0\n The verbosity level. The default, zero, means silent mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for:\n\n - Gibbs sampling from visible and hidden layers.\n\n - Initializing components, sampling from layers during fit.\n\n - Corrupting the data when scoring samples.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nintercept_hidden_ : array-like of shape (n_components,)\n Biases of the hidden units.\n\nintercept_visible_ : array-like of shape (n_features,)\n Biases of the visible units.\n\ncomponents_ : array-like of shape (n_components, n_features)\n Weight matrix, where n_features in the number of\n visible units and n_components is the number of hidden units.\n\nh_samples_ : array-like of shape (batch_size, n_components)\n Hidden Activation sampled from the model distribution,\n where batch_size in the number of examples per minibatch and\n n_components is the number of hidden units.\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.neural_network import BernoulliRBM\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> model = BernoulliRBM(n_components=2)\n>>> model.fit(X)\nBernoulliRBM(n_components=2)\n\nReferences\n----------\n\n[1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n deep belief nets. Neural Computation 18, pp 1527-1554.\n https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n[2] Tieleman, T. Training Restricted Boltzmann Machines using\n Approximations to the Likelihood Gradient. International Conference\n on Machine Learning (ICML) 2008" - } - ], - "functions": [] - }, - { - "name": "sklearn.neural_network._stochastic_optimizers", - "imports": [ - "import numpy as np" - ], - "classes": [ - { - "name": "BaseOptimizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params" - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_params", - "decorators": [], - "parameters": [ - { - "name": "grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Containing gradients with respect to coefs_ and intercepts_ in MLP model. So length should be aligned with params" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update parameters with given gradients\n\nParameters\n----------\ngrads : list, length = len(params)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params" - }, - { - "name": "iteration_ends", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform update to learning rate and potentially other states at the\nend of an iteration" - }, - { - "name": "trigger_stopping", - "decorators": [], - "parameters": [ - { - "name": "msg", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Message passed in for verbose output" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Print message to stdin if True" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decides whether it is time to stop training\n\nParameters\n----------\nmsg : str\n Message passed in for verbose output\n\nverbose : bool\n Print message to stdin if True\n\nReturns\n-------\nis_stopping : bool\n True if training needs to stop" - } - ], - "docstring": "Base (Stochastic) gradient descent optimizer\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nAttributes\n----------\nlearning_rate : float\n the current learning rate" - }, - { - "name": "SGDOptimizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params" - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights" - }, - { - "name": "lr_schedule", - "type": "Literal['constant', 'adaptive', 'invscaling']", - "hasDefault": true, - "default": "'constant'", - "limitation": null, - "ignored": false, - "docstring": "Learning rate schedule for weight updates. -'constant', is a constant learning rate given by 'learning_rate_init'. -'invscaling' gradually decreases the learning rate 'learning_rate_' at each time step 't' using an inverse scaling exponent of 'power_t'. learning_rate_ = learning_rate_init / pow(t, power_t) -'adaptive', keeps the learning rate constant to 'learning_rate_init' as long as the training keeps decreasing. Each time 2 consecutive epochs fail to decrease the training loss by tol, or fail to increase validation score by tol if 'early_stopping' is on, the current learning rate is divided by 5." - }, - { - "name": "momentum", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Value of momentum used, must be larger than or equal to 0" - }, - { - "name": "nesterov", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use nesterov's momentum or not. Use nesterov's if True" - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Power of time step 't' in inverse scaling. See `lr_schedule` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "iteration_ends", - "decorators": [], - "parameters": [ - { - "name": "time_step", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of training samples trained on so far, used to update learning rate for 'invscaling'" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform updates to learning rate and potential other states at the\nend of an iteration\n\nParameters\n----------\ntime_step : int\n number of training samples trained on so far, used to update\n learning rate for 'invscaling'" - }, - { - "name": "trigger_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_updates", - "decorators": [], - "parameters": [ - { - "name": "grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Containing gradients with respect to coefs_ and intercepts_ in MLP model. So length should be aligned with params" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the values used to update params with given gradients\n\nParameters\n----------\ngrads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\nReturns\n-------\nupdates : list, length = len(grads)\n The values to add to params" - } - ], - "docstring": "Stochastic gradient descent optimizer with momentum\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nlr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'\n Learning rate schedule for weight updates.\n\n -'constant', is a constant learning rate given by\n 'learning_rate_init'.\n\n -'invscaling' gradually decreases the learning rate 'learning_rate_' at\n each time step 't' using an inverse scaling exponent of 'power_t'.\n learning_rate_ = learning_rate_init / pow(t, power_t)\n\n -'adaptive', keeps the learning rate constant to\n 'learning_rate_init' as long as the training keeps decreasing.\n Each time 2 consecutive epochs fail to decrease the training loss by\n tol, or fail to increase validation score by tol if 'early_stopping'\n is on, the current learning rate is divided by 5.\n\nmomentum : float, default=0.9\n Value of momentum used, must be larger than or equal to 0\n\nnesterov : bool, default=True\n Whether to use nesterov's momentum or not. Use nesterov's if True\n\npower_t : float, default=0.5\n Power of time step 't' in inverse scaling. See `lr_schedule` for\n more details.\n\nAttributes\n----------\nlearning_rate : float\n the current learning rate\n\nvelocities : list, length = len(params)\n velocities that are used to update params" - }, - { - "name": "AdamOptimizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params" - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights" - }, - { - "name": "beta_1", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of first moment vector, should be in [0, 1)" - }, - { - "name": "beta_2", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of second moment vector, should be in [0, 1)" - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Value for numerical stability" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_updates", - "decorators": [], - "parameters": [ - { - "name": "grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Containing gradients with respect to coefs_ and intercepts_ in MLP model. So length should be aligned with params" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the values used to update params with given gradients\n\nParameters\n----------\ngrads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\nReturns\n-------\nupdates : list, length = len(grads)\n The values to add to params" - } - ], - "docstring": "Stochastic gradient descent optimizer with Adam\n\nNote: All default values are from the original Adam paper\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.001\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector, should be\n in [0, 1)\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector, should be\n in [0, 1)\n\nepsilon : float, default=1e-8\n Value for numerical stability\n\nAttributes\n----------\nlearning_rate : float\n The current learning rate\n\nt : int\n Timestep\n\nms : list, length = len(params)\n First moment vectors\n\nvs : list, length = len(params)\n Second moment vectors\n\nReferences\n----------\nKingma, Diederik, and Jimmy Ba.\n\"Adam: A method for stochastic optimization.\"\narXiv preprint arXiv:1412.6980 (2014)." - } - ], - "functions": [] - }, - { - "name": "sklearn.neural_network", - "imports": [ - "from _rbm import BernoulliRBM", - "from _multilayer_perceptron import MLPClassifier", - "from _multilayer_perceptron import MLPRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neural_network.tests.test_base", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.neural_network._base import binary_log_loss", - "from sklearn.neural_network._base import log_loss" - ], - "classes": [], - "functions": [ - { - "name": "test_binary_log_loss_1_prob_finite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_loss_1_prob_finite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests.test_mlp", - "imports": [ - "import pytest", - "import sys", - "import warnings", - "import re", - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.exceptions import ConvergenceWarning", - "from io import StringIO", - "from sklearn.metrics import roc_auc_score", - "from sklearn.neural_network import MLPClassifier", - "from sklearn.neural_network import MLPRegressor", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import scale", - "from scipy.sparse import csr_matrix", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_classification_maxfun", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_regression_maxfun", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_rate_warmstart", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_classes_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_unseen_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_params_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_matrices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose_sgd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaptive_learning_rate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_full_iteration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_no_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_no_change_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_stratified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mlp_classifier_dtypes_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mlp_regressor_dtypes_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mlp_param_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests.test_rbm", - "imports": [ - "import sys", - "import re", - "import pytest", - "import numpy as np", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.datasets import load_digits", - "from io import StringIO", - "from sklearn.neural_network import BernoulliRBM", - "from sklearn.utils.validation import assert_all_finite" - ], - "classes": [], - "functions": [ - { - "name": "test_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_sparse_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_hiddens", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_gibbs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_gibbs_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gibbs_smoke", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rbm_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_and_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transformer_dtypes_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_dtype_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests.test_stochastic_optimizers", - "imports": [ - "import numpy as np", - "from sklearn.neural_network._stochastic_optimizers import BaseOptimizer", - "from sklearn.neural_network._stochastic_optimizers import SGDOptimizer", - "from sklearn.neural_network._stochastic_optimizers import AdamOptimizer", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_base_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_no_momentum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_momentum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_trigger_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_nesterovs_momentum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adam_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.preprocessing.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing._data", - "imports": [ - "from itertools import chain", - "from itertools import combinations", - "import warnings", - "from itertools import combinations_with_replacement as combinations_w_r", - "import numpy as np", - "from scipy import sparse", - "from scipy import stats", - "from scipy import optimize", - "from scipy.special import boxcox", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils.deprecation import deprecated", - "from utils.extmath import row_norms", - "from utils.extmath import _incremental_mean_and_var", - "from utils.extmath import _incremental_weighted_mean_and_var", - "from utils.sparsefuncs_fast import inplace_csr_row_normalize_l1", - "from utils.sparsefuncs_fast import inplace_csr_row_normalize_l2", - "from utils.sparsefuncs import inplace_column_scale", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.sparsefuncs import incr_mean_variance_axis", - "from utils.sparsefuncs import min_max_axis", - "from utils.validation import check_is_fitted", - "from utils.validation import check_random_state", - "from utils.validation import _check_sample_weight", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from _csr_polynomial_expansion import _csr_polynomial_expansion", - "from _encoders import OneHotEncoder" - ], - "classes": [ - { - "name": "MinMaxScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "feature_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired range of transformed data." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array)." - }, - { - "name": "clip: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set to True to clip transformed values of held-out data to provided `feature range`. .. versionadded:: 0.24" - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set to True to clip transformed values of held-out data to provided `feature range`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the per-feature minimum and maximum used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the minimum and maximum to be used for later scaling.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online computation of min and max on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data that will be transformed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale features of X according to feature_range.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data that will be transformed.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n Transformed data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data that will be transformed. It cannot be sparse." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Undo the scaling of X according to feature_range.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data that will be transformed. It cannot be sparse.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n Transformed data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfeature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\ncopy : bool, default=True\n Set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array).\n\nclip: bool, default=False\n Set to True to clip transformed values of held-out data to\n provided `feature range`.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nmin_ : ndarray of shape (n_features,)\n Per feature adjustment for minimum. Equivalent to\n ``min - X.min(axis=0) * self.scale_``\n\nscale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data. Equivalent to\n ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\ndata_min_ : ndarray of shape (n_features,)\n Per feature minimum seen in the data\n\n .. versionadded:: 0.17\n *data_min_*\n\ndata_max_ : ndarray of shape (n_features,)\n Per feature maximum seen in the data\n\n .. versionadded:: 0.17\n *data_max_*\n\ndata_range_ : ndarray of shape (n_features,)\n Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n .. versionadded:: 0.17\n *data_range_*\n\nn_samples_seen_ : int\n The number of samples processed by the estimator.\n It will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MinMaxScaler\n>>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n>>> scaler = MinMaxScaler()\n>>> print(scaler.fit(data))\nMinMaxScaler()\n>>> print(scaler.data_max_)\n[ 1. 18.]\n>>> print(scaler.transform(data))\n[[0. 0. ]\n [0.25 0.25]\n [0.5 0.5 ]\n [1. 1. ]]\n>>> print(scaler.transform([[2, 2]]))\n[[1.5 0. ]]\n\nSee Also\n--------\nminmax_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "StandardScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned." - }, - { - "name": "with_mean", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory." - }, - { - "name": "with_std", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to unit variance (or equivalently, unit standard deviation)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. .. versionadded:: 0.24 parameter *sample_weight* support to StandardScaler." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the mean and std to be used for later scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. .. versionadded:: 0.24 parameter *sample_weight* support to StandardScaler." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online computation of mean and std on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nThe algorithm for incremental mean and std is given in Equation 1.5a,b\nin Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\nfor computing the sample variance: Analysis and recommendations.\"\nThe American Statistician 37.3 (1983): 242-247:\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform standardization by centering and scaling\n\nParameters\n----------\nX : {array-like, sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis.\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis.\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Standardize features by removing the mean and scaling to unit variance\n\nThe standard score of a sample `x` is calculated as:\n\n z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthat others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncopy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\nwith_mean : bool, default=True\n If True, center the data before scaling.\n This does not work (and will raise an exception) when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\nwith_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,) or None\n Per feature relative scaling of the data to achieve zero mean and unit\n variance. Generally this is calculated using `np.sqrt(var_)`. If a\n variance is zero, we can't achieve unit variance, and the data is left\n as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n when `with_std=False`.\n\n .. versionadded:: 0.17\n *scale_*\n\nmean_ : ndarray of shape (n_features,) or None\n The mean value for each feature in the training set.\n Equal to ``None`` when ``with_mean=False``.\n\nvar_ : ndarray of shape (n_features,) or None\n The variance for each feature in the training set. Used to compute\n `scale_`. Equal to ``None`` when ``with_std=False``.\n\nn_samples_seen_ : int or ndarray of shape (n_features,)\n The number of samples processed by the estimator for each feature.\n If there are no missing samples, the ``n_samples_seen`` will be an\n integer, otherwise it will be an array of dtype int. If\n `sample_weights` are used it will be a float (if no missing data)\n or an array of dtype float that sums the weights seen so far.\n Will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler\n>>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n>>> scaler = StandardScaler()\n>>> print(scaler.fit(data))\nStandardScaler()\n>>> print(scaler.mean_)\n[0.5 0.5]\n>>> print(scaler.transform(data))\n[[-1. -1.]\n [-1. -1.]\n [ 1. 1.]\n [ 1. 1.]]\n>>> print(scaler.transform([[2, 2]]))\n[[3. 3.]]\n\nSee Also\n--------\nscale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA` : Further removes the linear\n correlation across features with 'whiten=True'.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "MaxAbsScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace scaling and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the per-feature minimum and maximum used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the maximum absolute value to be used for later scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online computation of max absolute value of X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data that should be scaled." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale the data\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be scaled.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data that should be transformed back." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be transformed back.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\nmax_abs_ : ndarray of shape (n_features,)\n Per feature maximum absolute value.\n\nn_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MaxAbsScaler\n>>> X = [[ 1., -1., 2.],\n... [ 2., 0., 0.],\n... [ 0., 1., -1.]]\n>>> transformer = MaxAbsScaler().fit(X)\n>>> transformer\nMaxAbsScaler()\n>>> transformer.transform(X)\narray([[ 0.5, -1. , 1. ],\n [ 1. , 0. , 0. ],\n [ 0. , 1. , -0.5]])\n\nSee Also\n--------\nmaxabs_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "RobustScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "with_centering", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling. This will cause ``transform`` to raise an exception when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory." - }, - { - "name": "with_scaling", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to interquartile range." - }, - { - "name": "quantile_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Quantile range used to calculate ``scale_``. .. versionadded:: 0.18" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned." - }, - { - "name": "unit_variance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of ``q_max`` and ``q_min`` for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the median and quantiles used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the median and quantiles to be used for scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the median and quantiles\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the specified axis." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Center and scale the data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the specified axis.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The rescaled data to be transformed back." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The rescaled data to be transformed back.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the ``transform`` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nwith_centering : bool, default=True\n If True, center the data before scaling.\n This will cause ``transform`` to raise an exception when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\nwith_scaling : bool, default=True\n If True, scale the data to interquartile range.\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\ncopy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\nunit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncenter_ : array of floats\n The median value for each feature in the training set.\n\nscale_ : array of floats\n The (scaled) interquartile range for each feature in the training set.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\nExamples\n--------\n>>> from sklearn.preprocessing import RobustScaler\n>>> X = [[ 1., -2., 2.],\n... [ -2., 1., 3.],\n... [ 4., 1., -2.]]\n>>> transformer = RobustScaler().fit(X)\n>>> transformer\nRobustScaler()\n>>> transformer.transform(X)\narray([[ 0. , -2. , 0. ],\n [-1. , 0. , 0.4],\n [ 1. , 0. , -1.6]])\n\nSee Also\n--------\nrobust_scale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA`\n Further removes the linear correlation across features with\n 'whiten=True'.\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nhttps://en.wikipedia.org/wiki/Median\nhttps://en.wikipedia.org/wiki/Interquartile_range" - }, - { - "name": "PolynomialFeatures", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The degree of the polynomial features." - }, - { - "name": "interaction_only", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, only interaction features are produced: features that are products of at most ``degree`` *distinct* input features (so not ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.)." - }, - { - "name": "include_bias", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True (default), then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an intercept term in a linear model)." - }, - { - "name": "order", - "type": "Literal['C', 'F']", - "hasDefault": true, - "default": "'C'", - "limitation": null, - "ignored": false, - "docstring": "Order of output array in the dense case. 'F' order is faster to compute, but may slow down subsequent estimators. .. versionadded:: 0.21" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_combinations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "powers_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [ - { - "name": "input_features", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String names for input features if available. By default, \"x0\", \"x1\", ... \"xn_features\" is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return feature names for output features\n\nParameters\n----------\ninput_features : list of str of shape (n_features,), default=None\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : list of str of shape (n_output_features,)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute number of output features.\n\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to transform, row by row. Prefer CSR over CSC for sparse input (for speed), but CSC is required if the degree is 4 or higher. If the degree is less than 4 and the input format is CSC, it will be converted to CSR, have its polynomial features generated, then converted back to CSC. If the degree is 2 or 3, the method described in \"Leveraging Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is used, which is much faster than the method used on CSC input. For this reason, a CSC input will be converted to CSR, and the output will be converted back to CSC prior to being returned, hence the preference of CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data to polynomial features\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform, row by row.\n\n Prefer CSR over CSC for sparse input (for speed), but CSC is\n required if the degree is 4 or higher. If the degree is less than\n 4 and the input format is CSC, it will be converted to CSR, have\n its polynomial features generated, then converted back to CSC.\n\n If the degree is 2 or 3, the method described in \"Leveraging\n Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n used, which is much faster than the method used on CSC input. For\n this reason, a CSC input will be converted to CSR, and the output\n will be converted back to CSC prior to being returned, hence the\n preference of CSR.\n\nReturns\n-------\nXP : {ndarray, sparse matrix} of shape (n_samples, NP)\n The matrix of features, where NP is the number of polynomial\n features generated from the combination of inputs. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csr_matrix``." - } - ], - "docstring": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\nParameters\n----------\ndegree : int, default=2\n The degree of the polynomial features.\n\ninteraction_only : bool, default=False\n If true, only interaction features are produced: features that are\n products of at most ``degree`` *distinct* input features (so not\n ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).\n\ninclude_bias : bool, default=True\n If True (default), then include a bias column, the feature in which\n all polynomial powers are zero (i.e. a column of ones - acts as an\n intercept term in a linear model).\n\norder : {'C', 'F'}, default='C'\n Order of output array in the dense case. 'F' order is faster to\n compute, but may slow down subsequent estimators.\n\n .. versionadded:: 0.21\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PolynomialFeatures\n>>> X = np.arange(6).reshape(3, 2)\n>>> X\narray([[0, 1],\n [2, 3],\n [4, 5]])\n>>> poly = PolynomialFeatures(2)\n>>> poly.fit_transform(X)\narray([[ 1., 0., 1., 0., 0., 1.],\n [ 1., 2., 3., 4., 6., 9.],\n [ 1., 4., 5., 16., 20., 25.]])\n>>> poly = PolynomialFeatures(interaction_only=True)\n>>> poly.fit_transform(X)\narray([[ 1., 0., 1., 0.],\n [ 1., 2., 3., 6.],\n [ 1., 4., 5., 20.]])\n\nAttributes\n----------\npowers_ : ndarray of shape (n_output_features, n_input_features)\n powers_[i, j] is the exponent of the jth input in the ith output.\n\nn_input_features_ : int\n The total number of input features.\n\nn_output_features_ : int\n The total number of polynomial output features. The number of output\n features is computed by iterating over all suitably sized combinations\n of input features.\n\nNotes\n-----\nBe aware that the number of features in the output array scales\npolynomially in the number of features of the input array, and\nexponentially in the degree. High degrees can cause overfitting.\n\nSee :ref:`examples/linear_model/plot_polynomial_interpolation.py\n`" - }, - { - "name": "Normalizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "norm", - "type": "Literal['l1', 'l2', 'max']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The norm to use to normalize each non zero sample. If norm='max' is used, values will be rescaled by the maximum of the absolute values." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to estimate the normalization parameters." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Do nothing and return the estimator unchanged\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to estimate the normalization parameters.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to normalize, row by row. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale each non zero row of X to unit norm\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, row by row. scipy.sparse matrices should be\n in CSR format to avoid an un-necessary copy.\n\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnorm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample. If norm='max'\n is used, values will be rescaled by the maximum of the absolute\n values.\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix).\n\nExamples\n--------\n>>> from sklearn.preprocessing import Normalizer\n>>> X = [[4, 1, 2, 2],\n... [1, 3, 9, 3],\n... [5, 7, 5, 1]]\n>>> transformer = Normalizer().fit(X) # fit does nothing.\n>>> transformer\nNormalizer()\n>>> transformer.transform(X)\narray([[0.8, 0.2, 0.4, 0.4],\n [0.1, 0.3, 0.9, 0.3],\n [0.5, 0.7, 0.5, 0.1]])\n\nNotes\n-----\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nSee Also\n--------\nnormalize : Equivalent function without the estimator API." - }, - { - "name": "Binarizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace binarization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to binarize, element by element. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Binarize each element of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\ncopy : bool\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nthreshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n set to False to perform inplace binarization and avoid a copy (if\n the input is already a numpy array or a scipy.sparse CSR matrix).\n\nExamples\n--------\n>>> from sklearn.preprocessing import Binarizer\n>>> X = [[ 1., -1., 2.],\n... [ 2., 0., 0.],\n... [ 0., 1., -1.]]\n>>> transformer = Binarizer().fit(X) # fit does nothing.\n>>> transformer\nBinarizer()\n>>> transformer.transform(X)\narray([[1., 0., 1.],\n [1., 0., 0.],\n [0., 1., 0.]])\n\nNotes\n-----\nIf the input is a sparse matrix, only the non-zero values are subject\nto update by the Binarizer class.\n\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nSee Also\n--------\nbinarize : Equivalent function without the estimator API." - }, - { - "name": "KernelCenterer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "K", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit KernelCenterer\n\nParameters\n----------\nK : ndarray of shape (n_samples, n_samples)\n Kernel matrix.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "K", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel matrix." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Center kernel matrix.\n\nParameters\n----------\nK : ndarray of shape (n_samples1, n_samples2)\n Kernel matrix.\n\ncopy : bool, default=True\n Set to False to perform inplace computation.\n\nReturns\n-------\nK_new : ndarray of shape (n_samples1, n_samples2)" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Center a kernel matrix.\n\nLet K(x, z) be a kernel defined by phi(x)^T phi(z), where phi is a\nfunction mapping x to a Hilbert space. KernelCenterer centers (i.e.,\nnormalize to have zero mean) the data without explicitly computing phi(x).\nIt is equivalent to centering phi(x) with\nsklearn.preprocessing.StandardScaler(with_std=False).\n\nRead more in the :ref:`User Guide `.\n\nAttributes\n----------\nK_fit_rows_ : array of shape (n_samples,)\n Average of each column of kernel matrix.\n\nK_fit_all_ : float\n Average of kernel matrix.\n\nExamples\n--------\n>>> from sklearn.preprocessing import KernelCenterer\n>>> from sklearn.metrics.pairwise import pairwise_kernels\n>>> X = [[ 1., -2., 2.],\n... [ -2., 1., 3.],\n... [ 4., 1., -2.]]\n>>> K = pairwise_kernels(X, metric='linear')\n>>> K\narray([[ 9., 2., -2.],\n [ 2., 14., -13.],\n [ -2., -13., 21.]])\n>>> transformer = KernelCenterer().fit(K)\n>>> transformer\nKernelCenterer()\n>>> transformer.transform(K)\narray([[ 5., 0., -5.],\n [ 0., 14., -14.],\n [ -5., -14., 19.]])" - }, - { - "name": "QuantileTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_quantiles", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Number of quantiles to be computed. It corresponds to the number of landmarks used to discretize the cumulative distribution function. If n_quantiles is larger than the number of samples, n_quantiles is set to the number of samples as a larger number of quantiles does not give a better approximation of the cumulative distribution function estimator." - }, - { - "name": "output_distribution", - "type": "Literal['uniform', 'normal']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Marginal distribution for the transformed data. The choices are 'uniform' (default) or 'normal'." - }, - { - "name": "ignore_implicit_zeros", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Only applies to sparse matrices. If True, the sparse entries of the matrix are discarded to compute the quantile statistics. If False, these entries are treated as zeros." - }, - { - "name": "subsample", - "type": "int", - "hasDefault": true, - "default": "1e5", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of samples used to estimate the quantiles for computational efficiency. Note that the subsampling procedure may differ for value-identical sparse and dense matrices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for subsampling and smoothing noise. Please see ``subsample`` for more details. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace transformation and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_dense_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute percentiles for dense matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis." - }, - { - "name": "_sparse_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. The sparse matrix needs to be nonnegative. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute percentiles for sparse matrices.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis. The sparse matrix\n needs to be nonnegative. If a sparse matrix is provided,\n it will be converted into a sparse ``csc_matrix``." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. Additionally, the sparse matrix needs to be nonnegative if `ignore_implicit_zeros` is False." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the quantiles used for transforming.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "_transform_col", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function to transform a single feature." - }, - { - "name": "_check_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check inputs before fit and transform." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - }, - { - "name": "inverse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If False, apply forward transform. If True, apply inverse transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Forward and inverse transform.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n\ninverse : bool, default=False\n If False, apply forward transform. If True, apply\n inverse transform.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n Projected data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. Additionally, the sparse matrix needs to be nonnegative if `ignore_implicit_zeros` is False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Feature-wise transformation of the data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The projected data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. Additionally, the sparse matrix needs to be nonnegative if `ignore_implicit_zeros` is False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Back-projection to the original space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of (n_samples, n_features)\n The projected data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\nsubsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\ncopy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array).\n\nAttributes\n----------\nn_quantiles_ : int\n The actual number of quantiles used to discretize the cumulative\n distribution function.\n\nquantiles_ : ndarray of shape (n_quantiles, n_features)\n The values corresponding the quantiles of reference.\n\nreferences_ : ndarray of shape (n_quantiles, )\n Quantiles of references.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import QuantileTransformer\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n>>> qt.fit_transform(X)\narray([...])\n\nSee Also\n--------\nquantile_transform : Equivalent function without the estimator API.\nPowerTransformer : Perform mapping to a normal distribution using a power\n transform.\nStandardScaler : Perform standardization that is faster, but less robust\n to outliers.\nRobustScaler : Perform robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "PowerTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "method", - "type": "Literal['yeo-johnson', 'box-cox']", - "hasDefault": true, - "default": "'yeo-johnson'", - "limitation": null, - "ignored": false, - "docstring": "The power transform method. Available methods are: - 'yeo-johnson' [1]_, works with positive and negative values - 'box-cox' [2]_, only works with strictly positive values" - }, - { - "name": "standardize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True to apply zero-mean, unit-variance normalization to the transformed output." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace computation during transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to estimate the optimal transformation parameters." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Estimate the optimal parameter lambda for each feature.\n\nThe optimal lambda parameter for minimizing skewness is estimated on\neach feature independently using maximum likelihood.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to estimate the optimal transformation parameters.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed using a power transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the power transform to each feature using the fitted lambdas.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\nReturns\n-------\nX_trans : ndarray of shape (n_samples, n_features)\n The transformed data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transformed data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\n if lambda_ == 0:\n X = exp(X_trans)\n else:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\n if X >= 0 and lambda_ == 0:\n X = exp(X_trans) - 1\n elif X >= 0 and lambda_ != 0:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n elif X < 0 and lambda_ != 2:\n X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n elif X < 0 and lambda_ == 2:\n X = 1 - exp(-X_trans)\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The transformed data.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The original data." - }, - { - "name": "_box_cox_inverse_tranform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return inverse-transformed input x following Box-Cox inverse\ntransform with parameter lambda." - }, - { - "name": "_yeo_johnson_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return inverse-transformed input x following Yeo-Johnson inverse\ntransform with parameter lambda." - }, - { - "name": "_yeo_johnson_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return transformed input x following Yeo-Johnson transform with\nparameter lambda." - }, - { - "name": "_box_cox_optimize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find and return optimal lambda parameter of the Box-Cox transform by\nMLE, for observed data x.\n\nWe here use scipy builtins which uses the brent optimizer." - }, - { - "name": "_yeo_johnson_optimize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find and return optimal lambda parameter of the Yeo-Johnson\ntransform by MLE, for observed data x.\n\nLike for Box-Cox, MLE is done via the brent optimizer." - }, - { - "name": "_check_input", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "in_fit", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not `_check_input` is called from `fit` or other methods, e.g. `predict`, `transform`, etc." - }, - { - "name": "check_positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, check that all data is positive and non-zero (only if ``self.method=='box-cox'``)." - }, - { - "name": "check_shape", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, check that n_features matches the length of self.lambdas_" - }, - { - "name": "check_method", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, check that the transformation method is valid." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate the input before fit and transform.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nin_fit : bool\n Whether or not `_check_input` is called from `fit` or other\n methods, e.g. `predict`, `transform`, etc.\n\ncheck_positive : bool, default=False\n If True, check that all data is positive and non-zero (only if\n ``self.method=='box-cox'``).\n\ncheck_shape : bool, default=False\n If True, check that n_features matches the length of self.lambdas_\n\ncheck_method : bool, default=False\n If True, check that the transformation method is valid." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\nstandardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\ncopy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\nAttributes\n----------\nlambdas_ : ndarray of float of shape (n_features,)\n The parameters of the power transformation for the selected features.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PowerTransformer\n>>> pt = PowerTransformer()\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(pt.fit(data))\nPowerTransformer()\n>>> print(pt.lambdas_)\n[ 1.386... -3.100...]\n>>> print(pt.transform(data))\n[[-1.316... -0.707...]\n [ 0.209... -0.707...]\n [ 1.106... 1.414...]]\n\nSee Also\n--------\npower_transform : Equivalent function without the estimator API.\n\nQuantileTransformer : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964)." - } - ], - "functions": [ - { - "name": "_handle_zeros_in_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Makes sure that whenever scale is zero, we handle it correctly.\n\nThis happens in most scalers when we have constant features." - }, - { - "name": "scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to center and scale." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "axis used to compute the means and standard deviations along. If 0, independently standardize each feature, otherwise (if 1) standardize each sample." - }, - { - "name": "with_mean", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling." - }, - { - "name": "with_std", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to unit variance (or equivalently, unit standard deviation)." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSC matrix and if axis is 1)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to center and scale.\n\naxis : int, default=0\n axis used to compute the means and standard deviations along. If 0,\n independently standardize each feature, otherwise (if 1) standardize\n each sample.\n\nwith_mean : bool, default=True\n If True, center the data before scaling.\n\nwith_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSC matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_mean=False` (in that case, only variance scaling will be\nperformed on the features of the CSC matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSC matrix.\n\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.StandardScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n\nSee Also\n--------\nStandardScaler : Performs scaling to unit variance using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`)." - }, - { - "name": "minmax_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "feature_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired range of transformed data." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Axis used to scale along. If 0, independently scale each feature, otherwise (if 1) scale each sample." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace scaling and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\n X_scaled = scale * X + min - X.min(axis=0) * scale\n where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n *minmax_scale* function interface\n to :class:`~sklearn.preprocessing.MinMaxScaler`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\nfeature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\naxis : int, default=0\n Axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n The transformed data.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.minmax_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MinMaxScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MinMaxScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMinMaxScaler : Performs scaling to a given range using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "maxabs_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "axis used to scale along. If 0, independently scale each feature, otherwise (if 1) scale each sample." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace scaling and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\naxis : int, default=0\n axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know what\n you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMaxAbsScaler : Performs scaling to the [-1, 1] range using\n the Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "robust_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to center and scale." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "axis used to compute the medians and IQR along. If 0, independently scale each feature, otherwise (if 1) scale each sample." - }, - { - "name": "with_centering", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling." - }, - { - "name": "with_scaling", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to unit variance (or equivalently, unit standard deviation)." - }, - { - "name": "quantile_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR Quantile range used to calculate ``scale_``. .. versionadded:: 0.18" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix and if axis is 1)." - }, - { - "name": "unit_variance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of ``q_max`` and ``q_min`` for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Standardize a dataset along any axis\n\nCenter to the median and component wise scale\naccording to the interquartile range.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_sample, n_features)\n The data to center and scale.\n\naxis : int, default=0\n axis used to compute the medians and IQR along. If 0,\n independently scale each feature, otherwise (if 1) scale\n each sample.\n\nwith_centering : bool, default=True\n If True, center the data before scaling.\n\nwith_scaling : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0\n default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\nunit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_centering=False` (in that case, only variance scaling will be\nperformed on the features of the CSR matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSR matrix.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.robust_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.RobustScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(RobustScaler(), LogisticRegression())`.\n\nSee Also\n--------\nRobustScaler : Performs centering and scaling using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`)." - }, - { - "name": "normalize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to normalize, element by element. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy." - }, - { - "name": "norm", - "type": "Literal['l1', 'l2', 'max']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The norm to use to normalize each non zero sample (or each non-zero feature if axis is 0)." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "axis used to normalize the data along. If 1, independently normalize each sample, otherwise (if 0) normalize each feature." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix and if axis is 1)." - }, - { - "name": "return_norm", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether to return the computed norms" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale input vectors individually to unit norm (vector length).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\nnorm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample (or each non-zero\n feature if axis is 0).\n\naxis : {0, 1}, default=1\n axis used to normalize the data along. If 1, independently normalize\n each sample, otherwise (if 0) normalize each feature.\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\nreturn_norm : bool, default=False\n whether to return the computed norms\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Normalized input X.\n\nnorms : ndarray of shape (n_samples, ) if axis=1 else (n_features, )\n An array of norms along given axis for X.\n When X is sparse, a NotImplementedError will be raised\n for norm 'l1' or 'l2'.\n\nSee Also\n--------\nNormalizer : Performs normalization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "binarize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to binarize, element by element. scipy.sparse matrices should be in CSR or CSC format to avoid an un-necessary copy." - }, - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace binarization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR / CSC matrix and if axis is 1)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Boolean thresholding of array-like or scipy.sparse matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR or CSC format to avoid an\n un-necessary copy.\n\nthreshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n set to False to perform inplace binarization and avoid a copy\n (if the input is already a numpy array or a scipy.sparse CSR / CSC\n matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nSee Also\n--------\nBinarizer : Performs binarization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`)." - }, - { - "name": "add_dummy_feature", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "value", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value to use for the dummy feature." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Augment dataset with an additional dummy feature.\n\nThis is useful for fitting an intercept term with implementations which\ncannot otherwise fit it directly.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nvalue : float\n Value to use for the dummy feature.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features + 1)\n Same data with dummy feature added as first column.\n\nExamples\n--------\n>>> from sklearn.preprocessing import add_dummy_feature\n>>> add_dummy_feature([[0, 1], [1, 0]])\narray([[1., 0., 1.],\n [1., 1., 0.]])" - }, - { - "name": "quantile_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to transform." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Axis used to compute the means and standard deviations along. If 0, transform each feature, otherwise (if 1) transform each sample." - }, - { - "name": "n_quantiles", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Number of quantiles to be computed. It corresponds to the number of landmarks used to discretize the cumulative distribution function. If n_quantiles is larger than the number of samples, n_quantiles is set to the number of samples as a larger number of quantiles does not give a better approximation of the cumulative distribution function estimator." - }, - { - "name": "output_distribution", - "type": "Literal['uniform', 'normal']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Marginal distribution for the transformed data. The choices are 'uniform' (default) or 'normal'." - }, - { - "name": "ignore_implicit_zeros", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Only applies to sparse matrices. If True, the sparse entries of the matrix are discarded to compute the quantile statistics. If False, these entries are treated as zeros." - }, - { - "name": "subsample", - "type": "int", - "hasDefault": true, - "default": "1e5", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of samples used to estimate the quantiles for computational efficiency. Note that the subsampling procedure may differ for value-identical sparse and dense matrices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for subsampling and smoothing noise. Please see ``subsample`` for more details. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace transformation and avoid a copy (if the input is already a numpy array). If True, a copy of `X` is transformed, leaving the original `X` unchanged ..versionchanged:: 0.23 The default value of `copy` changed from False to True in 0.23." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform.\n\naxis : int, default=0\n Axis used to compute the means and standard deviations along. If 0,\n transform each feature, otherwise (if 1) transform each sample.\n\nn_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\nsubsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\ncopy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array). If True, a copy of `X` is transformed,\n leaving the original `X` unchanged\n\n ..versionchanged:: 0.23\n The default value of `copy` changed from False to True in 0.23.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import quantile_transform\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\narray([...])\n\nSee Also\n--------\nQuantileTransformer : Performs quantile-based scaling using the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\npower_transform : Maps data to a normal distribution using a\n power transformation.\nscale : Performs standardization that is faster, but less robust\n to outliers.\nrobust_scale : Performs robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n you know what you are doing. A common mistake is to apply it\n to the entire data *before* splitting into training and\n test sets. This will bias the model evaluation because\n information would have leaked from the test set to the\n training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.QuantileTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking:`pipe = make_pipeline(QuantileTransformer(),\n LogisticRegression())`.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "power_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed using a power transformation." - }, - { - "name": "method", - "type": "Literal['yeo-johnson', 'box-cox']", - "hasDefault": true, - "default": "'yeo-johnson'", - "limitation": null, - "ignored": false, - "docstring": "The power transform method. Available methods are: - 'yeo-johnson' [1]_, works with positive and negative values - 'box-cox' [2]_, only works with strictly positive values .. versionchanged:: 0.23 The default value of the `method` parameter changed from 'box-cox' to 'yeo-johnson' in 0.23." - }, - { - "name": "standardize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True to apply zero-mean, unit-variance normalization to the transformed output." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace computation during transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Power transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, power_transform supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\n .. versionchanged:: 0.23\n The default value of the `method` parameter changed from\n 'box-cox' to 'yeo-johnson' in 0.23.\n\nstandardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\ncopy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\nReturns\n-------\nX_trans : ndarray of shape (n_samples, n_features)\n The transformed data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import power_transform\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(power_transform(data, method='box-cox'))\n[[-1.332... -0.707...]\n [ 0.256... -0.707...]\n [ 1.076... 1.414...]]\n\n.. warning:: Risk of data leak.\n Do not use :func:`~sklearn.preprocessing.power_transform` unless you\n know what you are doing. A common mistake is to apply it to the entire\n data *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.PowerTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking, e.g.: `pipe = make_pipeline(PowerTransformer(),\n LogisticRegression())`.\n\nSee Also\n--------\nPowerTransformer : Equivalent transformation with the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nquantile_transform : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964)." - } - ] - }, - { - "name": "sklearn.preprocessing._discretization", - "imports": [ - "import numbers", - "import numpy as np", - "import warnings", - "from None import OneHotEncoder", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from cluster import KMeans" - ], - "classes": [ - { - "name": "KBinsDiscretizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_bins", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of bins to produce. Raises ValueError if ``n_bins < 2``." - }, - { - "name": "encode", - "type": "Literal['onehot', 'onehot-dense', 'ordinal']", - "hasDefault": true, - "default": "'onehot'", - "limitation": null, - "ignored": false, - "docstring": "Method used to encode the transformed result. onehot Encode the transformed result with one-hot encoding and return a sparse matrix. Ignored features are always stacked to the right. onehot-dense Encode the transformed result with one-hot encoding and return a dense array. Ignored features are always stacked to the right. ordinal Return the bin identifier encoded as an integer value." - }, - { - "name": "strategy", - "type": "Literal['uniform', 'quantile', 'kmeans']", - "hasDefault": true, - "default": "'quantile'", - "limitation": null, - "ignored": false, - "docstring": "Strategy used to define the widths of the bins. uniform All bins in each feature have identical widths. quantile All bins in each feature have the same number of points. kmeans Values in each bin have the same nearest center of a 1D k-means cluster." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The desired data-type for the output. If None, output dtype is consistent with input dtype. Only np.float32 and np.float64 are supported. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to be discretized." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimator.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself" - }, - { - "name": "_validate_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns n_bins_, the number of bins per feature.\n " - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to be discretized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Discretize the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\nReturns\n-------\nXt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n Data in the binned space. Will be a sparse matrix if\n `self.encode='onehot'` and ndarray otherwise." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xt", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Transformed data in the binned space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform discretized data back to original feature space.\n\nNote that this function does not regenerate the original data\ndue to discretization rounding.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_features)\n Transformed data in the binned space.\n\nReturns\n-------\nXinv : ndarray, dtype={np.float32, np.float64}\n Data in the original feature space." - } - ], - "docstring": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nn_bins : int or array-like of shape (n_features,), default=5\n The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\nencode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n Method used to encode the transformed result.\n\n onehot\n Encode the transformed result with one-hot encoding\n and return a sparse matrix. Ignored features are always\n stacked to the right.\n onehot-dense\n Encode the transformed result with one-hot encoding\n and return a dense array. Ignored features are always\n stacked to the right.\n ordinal\n Return the bin identifier encoded as an integer value.\n\nstrategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n Strategy used to define the widths of the bins.\n\n uniform\n All bins in each feature have identical widths.\n quantile\n All bins in each feature have the same number of points.\n kmeans\n Values in each bin have the same nearest center of a 1D k-means\n cluster.\n\ndtype : {np.float32, np.float64}, default=None\n The desired data-type for the output. If None, output dtype is\n consistent with input dtype. Only np.float32 and np.float64 are\n supported.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_bins_ : ndarray of shape (n_features,), dtype=np.int_\n Number of bins per feature. Bins whose width are too small\n (i.e., <= 1e-8) are removed with a warning.\n\nbin_edges_ : ndarray of ndarray of shape (n_features,)\n The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n Ignored features will have empty arrays.\n\nSee Also\n--------\nBinarizer : Class used to bin values as ``0`` or\n ``1`` based on a parameter ``threshold``.\n\nNotes\n-----\nIn bin edges for feature ``i``, the first and last values are used only for\n``inverse_transform``. During transform, bin edges are extended to::\n\n np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\nYou can combine ``KBinsDiscretizer`` with\n:class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\npart of the features.\n\n``KBinsDiscretizer`` might produce constant features (e.g., when\n``encode = 'onehot'`` and certain bins do not contain any data).\nThese features can be removed with feature selection algorithms\n(e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\nExamples\n--------\n>>> X = [[-2, 1, -4, -1],\n... [-1, 2, -3, -0.5],\n... [ 0, 3, -2, 0.5],\n... [ 1, 4, -1, 2]]\n>>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n>>> est.fit(X)\nKBinsDiscretizer(...)\n>>> Xt = est.transform(X)\n>>> Xt # doctest: +SKIP\narray([[ 0., 0., 0., 0.],\n [ 1., 1., 1., 0.],\n [ 2., 2., 2., 1.],\n [ 2., 2., 2., 2.]])\n\nSometimes it may be useful to convert the data back into the original\nfeature space. The ``inverse_transform`` function converts the binned\ndata into the original feature space. Each value will be equal to the mean\nof the two bin edges.\n\n>>> est.bin_edges_[0]\narray([-2., -1., 0., 1.])\n>>> est.inverse_transform(Xt)\narray([[-1.5, 1.5, -3.5, -0.5],\n [-0.5, 2.5, -2.5, -0.5],\n [ 0.5, 3.5, -1.5, 0.5],\n [ 0.5, 3.5, -1.5, 1.5]])" - } - ], - "functions": [] - }, - { - "name": "sklearn.preprocessing._encoders", - "imports": [ - "import numpy as np", - "from scipy import sparse", - "import numbers", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import is_scalar_nan", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils._encode import _encode", - "from utils._encode import _check_unknown", - "from utils._encode import _unique" - ], - "classes": [ - { - "name": "_BaseEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n not do that)\n- return list of features (arrays): this list of features is\n constructed feature by feature to preserve the data types\n of pandas DataFrame columns, as otherwise information is lost\n and cannot be used, eg for the `categories_` attribute." - }, - { - "name": "_get_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for encoders that includes the code to categorize and\ntransform the input features." - }, - { - "name": "OneHotEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "categories", - "type": "Literal['auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - list : ``categories[i]`` holds the categories expected in the ith column. The passed categories should not mix strings and numeric values within a single feature, and should be sorted in case of numeric values. The used categories can be found in the ``categories_`` attribute. .. versionadded:: 0.20" - }, - { - "name": "drop", - "type": "Literal['first', 'if_binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data into a neural network or an unregularized regression. However, dropping one category breaks the symmetry of the original representation and can therefore induce a bias in downstream models, for instance for penalized linear classification or regression models. - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - 'if_binary' : drop the first category in each feature with two categories. Features with 1 or more than 2 categories are left intact. - array : ``drop[i]`` is the category in feature ``X[:, i]`` that should be dropped. .. versionchanged:: 0.23 Added option 'if_binary'." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Will return sparse matrix if set True else will return an array." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "float", - "limitation": null, - "ignored": false, - "docstring": "Desired dtype of output." - }, - { - "name": "handle_unknown", - "type": "Literal['error', 'ignore']", - "hasDefault": true, - "default": "'error'", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error or ignore if an unknown categorical feature is present during transform (default is to raise). When this parameter is set to 'ignore' and an unknown category is encountered during transform, the resulting one-hot encoded columns for this feature will be all zeros. In the inverse transform, an unknown category will be denoted as None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_keywords", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_drop_idx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to determine the categories of each feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit OneHotEncoder to X.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to determine the categories of each feature.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to encode." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit OneHotEncoder to X, then transform X.\n\nEquivalent to fit(X).transform(X) but more convenient.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to encode.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nX_out : sparse matrix if sparse=True else a 2-d array\n Transformed input." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to encode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform X using one-hot encoding.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to encode.\n\nReturns\n-------\nX_out : sparse matrix if sparse=True else a 2-d array\n Transformed input." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transformed data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the data back to the original representation.\n\nIn case unknown categories are encountered (all zeros in the\none-hot encoding), ``None`` is used to represent this category.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_encoded_features]\n The transformed data.\n\nReturns\n-------\nX_tr : array-like, shape [n_samples, n_features]\n Inverse transformed array." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [ - { - "name": "input_features", - "type": "List[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String names for input features if available. By default, \"x0\", \"x1\", ... \"xn_features\" is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return feature names for output features.\n\nParameters\n----------\ninput_features : list of str of shape (n_features,)\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : ndarray of shape (n_output_features,)\n Array of feature names." - } - ], - "docstring": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values within a single feature, and should be sorted in case of\n numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\n .. versionadded:: 0.20\n\ndrop : {'first', 'if_binary'} or a array-like of shape (n_features,), default=None\n Specifies a methodology to use to drop one of the categories per\n feature. This is useful in situations where perfectly collinear\n features cause problems, such as when feeding the resulting data\n into a neural network or an unregularized regression.\n\n However, dropping one category breaks the symmetry of the original\n representation and can therefore induce a bias in downstream models,\n for instance for penalized linear classification or regression models.\n\n - None : retain all features (the default).\n - 'first' : drop the first category in each feature. If only one\n category is present, the feature will be dropped entirely.\n - 'if_binary' : drop the first category in each feature with two\n categories. Features with 1 or more than 2 categories are\n left intact.\n - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n should be dropped.\n\n .. versionchanged:: 0.23\n Added option 'if_binary'.\n\nsparse : bool, default=True\n Will return sparse matrix if set True else will return an array.\n\ndtype : number type, default=float\n Desired dtype of output.\n\nhandle_unknown : {'error', 'ignore'}, default='error'\n Whether to raise an error or ignore if an unknown categorical feature\n is present during transform (default is to raise). When this parameter\n is set to 'ignore' and an unknown category is encountered during\n transform, the resulting one-hot encoded columns for this feature\n will be all zeros. In the inverse transform, an unknown category\n will be denoted as None.\n\nAttributes\n----------\ncategories_ : list of arrays\n The categories of each feature determined during fitting\n (in order of the features in X and corresponding with the output\n of ``transform``). This includes the category specified in ``drop``\n (if any).\n\ndrop_idx_ : array of shape (n_features,)\n - ``drop_idx_[i]`` is\u00a0the index in ``categories_[i]`` of the category\n to be dropped for each feature.\n - ``drop_idx_[i] = None`` if no category is to be dropped from the\n feature with index ``i``, e.g. when `drop='if_binary'` and the\n feature isn't binary.\n - ``drop_idx_ = None`` if all the transformed features will be\n retained.\n\n .. versionchanged:: 0.23\n Added the possibility to contain `None` values.\n\nSee Also\n--------\nOrdinalEncoder : Performs an ordinal (integer)\n encoding of the categorical features.\nsklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n dictionary items (also handles string-valued features).\nsklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n encoding of dictionary items or strings.\nLabelBinarizer : Binarizes labels in a one-vs-all\n fashion.\nMultiLabelBinarizer : Transforms between iterable of\n iterables and a multilabel format, e.g. a (samples x classes) binary\n matrix indicating the presence of a class label.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to a binary one-hot encoding.\n\n>>> from sklearn.preprocessing import OneHotEncoder\n\nOne can discard categories not seen during `fit`:\n\n>>> enc = OneHotEncoder(handle_unknown='ignore')\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOneHotEncoder(handle_unknown='ignore')\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\narray([[1., 0., 1., 0., 0.],\n [0., 1., 0., 0., 0.]])\n>>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\narray([['Male', 1],\n [None, 2]], dtype=object)\n>>> enc.get_feature_names(['gender', 'group'])\narray(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n dtype=object)\n\nOne can always drop the first column for each feature:\n\n>>> drop_enc = OneHotEncoder(drop='first').fit(X)\n>>> drop_enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 0., 0.],\n [1., 1., 0.]])\n\nOr drop a column for feature only having 2 categories:\n\n>>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n>>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 1., 0., 0.],\n [1., 0., 1., 0.]])" - }, - { - "name": "OrdinalEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "categories", - "type": "Literal['auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - list : ``categories[i]`` holds the categories expected in the ith column. The passed categories should not mix strings and numeric values, and should be sorted in case of numeric values. The used categories can be found in the ``categories_`` attribute." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Desired dtype of output." - }, - { - "name": "handle_unknown", - "type": "Literal['error', 'use_encoded_value']", - "hasDefault": true, - "default": "'error'", - "limitation": null, - "ignored": false, - "docstring": "When set to 'error' an error will be raised in case an unknown categorical feature is present during transform. When set to 'use_encoded_value', the encoded value of unknown categories will be set to the value given for the parameter `unknown_value`. In :meth:`inverse_transform`, an unknown category will be denoted as None. .. versionadded:: 0.24" - }, - { - "name": "unknown_value", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When the parameter handle_unknown is set to 'use_encoded_value', this parameter is required and will set the encoded value of unknown categories. It has to be distinct from the values used to encode any of the categories in `fit`. If set to np.nan, the `dtype` parameter must be a float dtype. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to determine the categories of each feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the OrdinalEncoder to X.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to determine the categories of each feature.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to encode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform X to ordinal codes.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to encode.\n\nReturns\n-------\nX_out : sparse matrix or a 2-d array\n Transformed input." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transformed data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the data back to the original representation.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_encoded_features]\n The transformed data.\n\nReturns\n-------\nX_tr : array-like, shape [n_samples, n_features]\n Inverse transformed array." - } - ], - "docstring": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values, and should be sorted in case of numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\ndtype : number type, default np.float64\n Desired dtype of output.\n\nhandle_unknown : {'error', 'use_encoded_value'}, default='error'\n When set to 'error' an error will be raised in case an unknown\n categorical feature is present during transform. When set to\n 'use_encoded_value', the encoded value of unknown categories will be\n set to the value given for the parameter `unknown_value`. In\n :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n .. versionadded:: 0.24\n\nunknown_value : int or np.nan, default=None\n When the parameter handle_unknown is set to 'use_encoded_value', this\n parameter is required and will set the encoded value of unknown\n categories. It has to be distinct from the values used to encode any of\n the categories in `fit`. If set to np.nan, the `dtype` parameter must\n be a float dtype.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategories_ : list of arrays\n The categories of each feature determined during ``fit`` (in order of\n the features in X and corresponding with the output of ``transform``).\n This does not include categories that weren't seen during ``fit``.\n\nSee Also\n--------\nOneHotEncoder : Performs a one-hot encoding of categorical features.\nLabelEncoder : Encodes target labels with values between 0 and\n ``n_classes-1``.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to an ordinal encoding.\n\n>>> from sklearn.preprocessing import OrdinalEncoder\n>>> enc = OrdinalEncoder()\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOrdinalEncoder()\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 3], ['Male', 1]])\narray([[0., 2.],\n [1., 0.]])\n\n>>> enc.inverse_transform([[1, 0], [0, 1]])\narray([['Male', 1],\n ['Female', 2]], dtype=object)" - } - ], - "functions": [] - }, - { - "name": "sklearn.preprocessing._function_transformer", - "imports": [ - "import warnings", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import _allclose_dense_sparse", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "FunctionTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function." - }, - { - "name": "inverse_func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function." - }, - { - "name": "validate", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or sparse matrix. If the conversion is not possible an exception is raised. .. versionchanged:: 0.22 The default of ``validate`` changed from True to False." - }, - { - "name": "accept_sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised." - }, - { - "name": "check_inverse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20" - }, - { - "name": "kw_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18" - }, - { - "name": "inv_kw_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that func and inverse_func are the inverse." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input array." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit transformer by checking X.\n\nIf ``validate`` is ``True``, ``X`` will be checked.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X using the forward function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n Transformed input." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X using the inverse function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n Transformed input." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfunc : callable, default=None\n The callable to use for the transformation. This will be passed\n the same arguments as transform, with args and kwargs forwarded.\n If func is None, then func will be the identity function.\n\ninverse_func : callable, default=None\n The callable to use for the inverse transformation. This will be\n passed the same arguments as inverse transform, with args and\n kwargs forwarded. If inverse_func is None, then inverse_func\n will be the identity function.\n\nvalidate : bool, default=False\n Indicate that the input X array should be checked before calling\n ``func``. The possibilities are:\n\n - If False, there is no input validation.\n - If True, then X will be converted to a 2-dimensional NumPy array or\n sparse matrix. If the conversion is not possible an exception is\n raised.\n\n .. versionchanged:: 0.22\n The default of ``validate`` changed from True to False.\n\naccept_sparse : bool, default=False\n Indicate that func accepts a sparse matrix as input. If validate is\n False, this has no effect. Otherwise, if accept_sparse is false,\n sparse matrix inputs will cause an exception to be raised.\n\ncheck_inverse : bool, default=True\n Whether to check that or ``func`` followed by ``inverse_func`` leads to\n the original inputs. It can be used for a sanity check, raising a\n warning when the condition is not fulfilled.\n\n .. versionadded:: 0.20\n\nkw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to func.\n\n .. versionadded:: 0.18\n\ninv_kw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to inverse_func.\n\n .. versionadded:: 0.18\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import FunctionTransformer\n>>> transformer = FunctionTransformer(np.log1p)\n>>> X = np.array([[0, 1], [2, 3]])\n>>> transformer.transform(X)\narray([[0. , 0.6931...],\n [1.0986..., 1.3862...]])" - } - ], - "functions": [ - { - "name": "_identity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The identity function.\n " - } - ] - }, - { - "name": "sklearn.preprocessing._label", - "imports": [ - "from collections import defaultdict", - "import itertools", - "import array", - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.sparsefuncs import min_max_axis", - "from utils import column_or_1d", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import unique_labels", - "from utils.multiclass import type_of_target", - "from utils._encode import _encode", - "from utils._encode import _unique" - ], - "classes": [ - { - "name": "LabelEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit label encoder.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit label encoder and return encoded labels.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform labels to normalized encoding.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform labels back to original encoding.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.12\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\nExamples\n--------\n`LabelEncoder` can be used to normalize labels.\n\n>>> from sklearn import preprocessing\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([1, 2, 2, 6])\nLabelEncoder()\n>>> le.classes_\narray([1, 2, 6])\n>>> le.transform([1, 1, 2, 6])\narray([0, 0, 1, 2]...)\n>>> le.inverse_transform([0, 0, 1, 2])\narray([1, 1, 2, 6])\n\nIt can also be used to transform non-numerical labels (as long as they are\nhashable and comparable) to numerical labels.\n\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\nLabelEncoder()\n>>> list(le.classes_)\n['amsterdam', 'paris', 'tokyo']\n>>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\narray([2, 2, 1]...)\n>>> list(le.inverse_transform([2, 2, 1]))\n['tokyo', 'tokyo', 'paris']\n\nSee Also\n--------\nOrdinalEncoder : Encode categorical features using an ordinal encoding\n scheme.\nOneHotEncoder : Encode categorical features as a one-hot numeric array." - }, - { - "name": "LabelBinarizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "neg_label", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Value with which negative labels must be encoded." - }, - { - "name": "pos_label", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Value with which positive labels must be encoded." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "True if the returned array from transform is desired to be in sparse CSR format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit label binarizer.\n\nParameters\n----------\ny : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification. Sparse matrix can be CSR, CSC, COO, DOK, or LIL." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit label binarizer and transform multi-class labels to binary\nlabels.\n\nThe output of transform is sometimes referred to as\nthe 1-of-K coding scheme.\n\nParameters\n----------\ny : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification. Sparse matrix can be CSR, CSC, COO, DOK, or LIL." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform multi-class labels to binary labels.\n\nThe output of transform is sometimes referred to by some authors as\nthe 1-of-K coding scheme.\n\nParameters\n----------\ny : {array, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. All sparse matrices are converted to CSR before inverse transformation." - }, - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold used in the binary and multi-label cases. Use 0 when ``Y`` contains the output of decision_function (classifier). Use 0.5 when ``Y`` contains the output of predict_proba. If None, the threshold is assumed to be half way between neg_label and pos_label." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform binary labels back to multi-class labels.\n\nParameters\n----------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Target values. All sparse matrices are converted to CSR before\n inverse transformation.\n\nthreshold : float, default=None\n Threshold used in the binary and multi-label cases.\n\n Use 0 when ``Y`` contains the output of decision_function\n (classifier).\n Use 0.5 when ``Y`` contains the output of predict_proba.\n\n If None, the threshold is assumed to be half way between\n neg_label and pos_label.\n\nReturns\n-------\ny : {ndarray, sparse matrix} of shape (n_samples,)\n Target values. Sparse matrix will be of CSR format.\n\nNotes\n-----\nIn the case when the binary labels are fractional\n(probabilistic), inverse_transform chooses the class with the\ngreatest value. Typically, this allows to use the output of a\nlinear model's decision_function method directly as the input\nof inverse_transform." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nneg_label : int, default=0\n Value with which negative labels must be encoded.\n\npos_label : int, default=1\n Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False\n True if the returned array from transform is desired to be in sparse\n CSR format.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\ny_type_ : str\n Represents the type of the target data as evaluated by\n utils.multiclass.type_of_target. Possible type are 'continuous',\n 'continuous-multioutput', 'binary', 'multiclass',\n 'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\nsparse_input_ : bool\n True if the input data to transform is given as a sparse matrix, False\n otherwise.\n\nExamples\n--------\n>>> from sklearn import preprocessing\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit([1, 2, 6, 4, 2])\nLabelBinarizer()\n>>> lb.classes_\narray([1, 2, 4, 6])\n>>> lb.transform([1, 6])\narray([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\nBinary targets transform to a column vector\n\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\narray([[1],\n [0],\n [0],\n [1]])\n\nPassing a 2D matrix for multilabel classification\n\n>>> import numpy as np\n>>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\nLabelBinarizer()\n>>> lb.classes_\narray([0, 1, 2])\n>>> lb.transform([0, 1, 2, 1])\narray([[1, 0, 0],\n [0, 1, 0],\n [0, 0, 1],\n [0, 1, 0]])\n\nSee Also\n--------\nlabel_binarize : Function to perform the transform operation of\n LabelBinarizer with fixed classes.\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme." - }, - { - "name": "MultiLabelBinarizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates an ordering for the class labels. All entries should be unique (cannot contain duplicate classes)." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Set to True if output binary array is desired in CSR sparse format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A set of labels (any orderable and hashable object) for each sample. If the `classes` parameter is set, `y` will not be iterated." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the label sets binarizer, storing :term:`classes_`.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\nself : returns this MultiLabelBinarizer instance" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A set of labels (any orderable and hashable object) for each sample. If the `classes` parameter is set, `y` will not be iterated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the label sets binarizer and transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\ny_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` i.f.f. `classes_[j]`\n is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n format." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A set of labels (any orderable and hashable object) for each sample. If the `classes` parameter is set, `y` will not be iterated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\ny_indicator : array or CSR matrix, shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n `y[i]`, and 0 otherwise." - }, - { - "name": "_build_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "class_mapping", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maps from label to column index in label indicator matrix." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transforms the label sets with a given mapping\n\nParameters\n----------\ny : iterable of iterables\nclass_mapping : Mapping\n Maps from label to column index in label indicator matrix.\n\nReturns\n-------\ny_indicator : sparse matrix of shape (n_samples, n_classes)\n Label indicator matrix. Will be of CSR format." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "yt", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A matrix containing only 1s ands 0s." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the given indicator matrix into label sets.\n\nParameters\n----------\nyt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix containing only 1s ands 0s.\n\nReturns\n-------\ny : list of tuples\n The set of labels for each sample such that `y[i]` consists of\n `classes_[j]` for each `yt[i, j] == 1`." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.\n\nParameters\n----------\nclasses : array-like of shape (n_classes,), default=None\n Indicates an ordering for the class labels.\n All entries should be unique (cannot contain duplicate classes).\n\nsparse_output : bool, default=False\n Set to True if output binary array is desired in CSR sparse format.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n A copy of the `classes` parameter when provided.\n Otherwise it corresponds to the sorted set of classes found\n when fitting.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MultiLabelBinarizer\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit_transform([(1, 2), (3,)])\narray([[1, 1, 0],\n [0, 0, 1]])\n>>> mlb.classes_\narray([1, 2, 3])\n\n>>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\narray([[0, 1, 1],\n [1, 0, 0]])\n>>> list(mlb.classes_)\n['comedy', 'sci-fi', 'thriller']\n\nA common mistake is to pass in a list, which leads to the following issue:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n 'y'], dtype=object)\n\nTo correct this, the list of labels should be passed in as:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['comedy', 'sci-fi', 'thriller'], dtype=object)\n\nSee Also\n--------\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme." - } - ], - "functions": [ - { - "name": "label_binarize", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sequence of integer labels or multilabel data to encode." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Uniquely holds the label for each class." - }, - { - "name": "neg_label", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Value with which negative labels must be encoded." - }, - { - "name": "pos_label", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Value with which positive labels must be encoded." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Set to true if output binary array is desired in CSR sparse format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nThis function makes it possible to compute this transformation for a\nfixed set of class labels known ahead of time.\n\nParameters\n----------\ny : array-like\n Sequence of integer labels or multilabel data to encode.\n\nclasses : array-like of shape (n_classes,)\n Uniquely holds the label for each class.\n\nneg_label : int, default=0\n Value with which negative labels must be encoded.\n\npos_label : int, default=1\n Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False,\n Set to true if output binary array is desired in CSR sparse format.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix will\n be of CSR format.\n\nExamples\n--------\n>>> from sklearn.preprocessing import label_binarize\n>>> label_binarize([1, 6], classes=[1, 2, 4, 6])\narray([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\nThe class ordering is preserved:\n\n>>> label_binarize([1, 6], classes=[1, 6, 4, 2])\narray([[1, 0, 0, 0],\n [0, 1, 0, 0]])\n\nBinary targets transform to a column vector\n\n>>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])\narray([[1],\n [0],\n [0],\n [1]])\n\nSee Also\n--------\nLabelBinarizer : Class used to wrap the functionality of label_binarize and\n allow for fitting to classes independently of the transform operation." - }, - { - "name": "_inverse_binarize_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse label binarization transformation for multiclass.\n\nMulticlass uses the maximal score instead of a threshold." - }, - { - "name": "_inverse_binarize_thresholding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse label binarization transformation using thresholding." - } - ] - }, - { - "name": "sklearn.preprocessing", - "imports": [ - "from _function_transformer import FunctionTransformer", - "from _data import Binarizer", - "from _data import KernelCenterer", - "from _data import MinMaxScaler", - "from _data import MaxAbsScaler", - "from _data import Normalizer", - "from _data import RobustScaler", - "from _data import StandardScaler", - "from _data import QuantileTransformer", - "from _data import add_dummy_feature", - "from _data import binarize", - "from _data import normalize", - "from _data import scale", - "from _data import robust_scale", - "from _data import maxabs_scale", - "from _data import minmax_scale", - "from _data import quantile_transform", - "from _data import power_transform", - "from _data import PowerTransformer", - "from _data import PolynomialFeatures", - "from _encoders import OneHotEncoder", - "from _encoders import OrdinalEncoder", - "from _label import label_binarize", - "from _label import LabelBinarizer", - "from _label import LabelEncoder", - "from _label import MultiLabelBinarizer", - "from _discretization import KBinsDiscretizer" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.preprocessing.tests.test_common", - "imports": [ - "import warnings", - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from sklearn.datasets import load_iris", - "from sklearn.model_selection import train_test_split", - "from sklearn.base import clone", - "from sklearn.preprocessing import maxabs_scale", - "from sklearn.preprocessing import minmax_scale", - "from sklearn.preprocessing import scale", - "from sklearn.preprocessing import power_transform", - "from sklearn.preprocessing import quantile_transform", - "from sklearn.preprocessing import robust_scale", - "from sklearn.preprocessing import MaxAbsScaler", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import PowerTransformer", - "from sklearn.preprocessing import QuantileTransformer", - "from sklearn.preprocessing import RobustScaler", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "_get_valid_samples_by_column", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get non NaN samples in column of X" - }, - { - "name": "test_missing_value_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_value_pandas_na_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_data", - "imports": [ - "import warnings", - "import itertools", - "import numpy as np", - "import numpy.linalg as la", - "from scipy import sparse", - "from scipy import stats", - "from scipy.sparse import random as sparse_random", - "import pytest", - "from sklearn.utils import gen_batches", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils.sparsefuncs import mean_variance_axis", - "from sklearn.preprocessing._data import _handle_zeros_in_scale", - "from sklearn.preprocessing._data import Binarizer", - "from sklearn.preprocessing._data import KernelCenterer", - "from sklearn.preprocessing._data import Normalizer", - "from sklearn.preprocessing._data import normalize", - "from sklearn.preprocessing._data import StandardScaler", - "from sklearn.preprocessing._data import scale", - "from sklearn.preprocessing._data import MinMaxScaler", - "from sklearn.preprocessing._data import minmax_scale", - "from sklearn.preprocessing._data import QuantileTransformer", - "from sklearn.preprocessing._data import quantile_transform", - "from sklearn.preprocessing._data import MaxAbsScaler", - "from sklearn.preprocessing._data import maxabs_scale", - "from sklearn.preprocessing._data import RobustScaler", - "from sklearn.preprocessing._data import robust_scale", - "from sklearn.preprocessing._data import add_dummy_feature", - "from sklearn.preprocessing._data import PolynomialFeatures", - "from sklearn.preprocessing._data import PowerTransformer", - "from sklearn.preprocessing._data import power_transform", - "from sklearn.preprocessing._data import BOUNDS_THRESHOLD", - "from sklearn.exceptions import NotFittedError", - "from sklearn.base import clone", - "from sklearn.pipeline import Pipeline", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.svm import SVR", - "from sklearn.utils import shuffle", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "toarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_dim_1axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_correct_incr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_feature_array_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csc_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_floats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_zero_row", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_degree_4", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_dim_edges", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_sample_weights_greater_than_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_2d_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_float16_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_handle_zeros_in_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minmax_scaler_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_partial_fit_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_trasform_with_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_scaler_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_scaler_zero_variance_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minmax_scale_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_scaler_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_without_centering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_n_samples_seen_with_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_identity_scalers_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_return_identity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_without_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_sparse_with_mean_raise_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_input_finiteness_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_error_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_col_zero_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_2d_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_equivalence_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_transform_one_row_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_iris_quantiles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_check_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_sparse_ignore_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_dense_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_subsampling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_sparse_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_and_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transformer_sorted_quantiles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_invalid_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_function_without_centering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scale_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scale_1d_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_zero_variance_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_unit_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_zero_variance_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_large_negative_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_transform_one_row_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_l1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_l2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_max_sign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_center_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cv_pipeline_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature_coo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_cold_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_valid_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_notfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_boxcox_strictly_positive_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_yeojohnson_any_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_shape_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_method_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_lambda_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_lambda_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_optimization_power_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_yeo_johnson_darwin_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_copy_True", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_copy_False", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_sparse_partial_fit_finite_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minmax_scaler_clip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_discretization", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "import warnings", - "from sklearn.preprocessing import KBinsDiscretizer", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_allclose_dense_sparse" - ], - "classes": [], - "functions": [ - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_valid_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_n_bins_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform_n_bins_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_1d_behavior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numeric_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_encode_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encode_options", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_strategy_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nonuniform_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_outside_fit_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_overwrite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_redundant_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_percentile_numeric_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_32_equal_64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_encoders", - "imports": [ - "import re", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils import is_scalar_nan", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.preprocessing import OrdinalEncoder" - ], - "classes": [], - "functions": [ - { - "name": "test_one_hot_encoder_sparse_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_diff_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_handle_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_not_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_handle_unknown_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_dtype_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_feature_names_unicode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_categorical_onehot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_inverse_transform_raise_error_with_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that `inverse_transform` raise an error with unknown samples, no\ndropped feature, and `handle_unknow=\"error`.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/14934" - }, - { - "name": "test_one_hot_encoder_inverse_if_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_drop_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_is_not_1D", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_is_not_1D_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_specified_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_unsorted_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_specified_categories_mixed_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_feature_names_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_drop_equals_if_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_specified_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_raise_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_numeric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_nan_non_float_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_raise_categories_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoder_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoder_dtypes_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_drop_manual", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_invalid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_drop_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoders_has_categorical_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoders_unicode_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that encoding work with string and object dtypes.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/15616\nhttps://github.com/scikit-learn/scikit-learn/issues/15726" - }, - { - "name": "test_ohe_missing_values_get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ohe_missing_value_support_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ohe_missing_value_support_pandas_categorical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_function_transformer", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings" - ], - "classes": [], - "functions": [ - { - "name": "_make_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_delegate_to_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_np_log", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kw_arg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kw_arg_update", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kw_arg_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_function_transformer_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_label", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import issparse", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils import _to_object_array", - "from sklearn.preprocessing._label import LabelBinarizer", - "from sklearn.preprocessing._label import MultiLabelBinarizer", - "from sklearn.preprocessing._label import LabelEncoder", - "from sklearn.preprocessing._label import label_binarize", - "from sklearn.preprocessing._label import _inverse_binarize_thresholding", - "from sklearn.preprocessing._label import _inverse_binarize_multiclass", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "toarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer_unseen_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer_set_label_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_negative_ints", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_str_bad_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_empty_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_output_multilabel_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_empty_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_unknown_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_given_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_multiple_calls", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_same_length_sequence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_non_integer_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_non_unique", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_inverse_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_with_class_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_binarized_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_input_label_binarize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_binarize_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.semi_supervised._label_propagation", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import numpy as np", - "from scipy import sparse", - "from scipy.sparse import csgraph", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from metrics.pairwise import rbf_kernel", - "from neighbors import NearestNeighbors", - "from utils.extmath import safe_sparse_dot", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "BaseLabelPropagation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['knn', 'rbf']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": " String identifier for kernel function to use or the kernel function itself. Only 'rbf' and 'knn' strings are valid inputs. The function passed should take two inputs, each of shape (n_samples, n_features), and return a (n_samples, n_samples) shaped weight matrix. gamma : float, default=20 Parameter for rbf kernel. n_neighbors : int, default=7 Parameter for knn kernel. Need to be strictly positive. alpha : float, default=1.0 Clamping factor. max_iter : int, default=30 Change maximum number of iterations allowed. tol : float, default=1e-3 Convergence tolerance: threshold to consider the system at steady state." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs inductive inference across the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n Predictions for input data." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probability for each possible outcome.\n\nCompute the probability estimates for each single sample in X\nand each possible outcome seen during training (categorical\ndistribution).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes)\n Normalized probability distributions across\n class labels." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A matrix of shape (n_samples, n_samples) will be created from this." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "`n_labeled_samples` (unlabeled points are marked as -1) All unlabeled samples will be transductively assigned labels." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit a semi-supervised label propagation model based\n\nAll the input data is provided matrix X (labeled and unlabeled)\nand corresponding label matrix y with a dedicated marker value for\nunlabeled samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A matrix of shape (n_samples, n_samples) will be created from this.\n\ny : array-like of shape (n_samples,)\n `n_labeled_samples` (unlabeled points are marked as -1)\n All unlabeled samples will be transductively assigned labels.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Base class for label propagation module.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n Parameter for knn kernel. Need to be strictly positive.\n\n alpha : float, default=1.0\n Clamping factor.\n\n max_iter : int, default=30\n Change maximum number of iterations allowed.\n\n tol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n " - }, - { - "name": "LabelPropagation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['knn', 'rbf']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "String identifier for kernel function to use or the kernel function itself. Only 'rbf' and 'knn' strings are valid inputs. The function passed should take two inputs, each of shape (n_samples, n_features), and return a (n_samples, n_samples) shaped weight matrix." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "Parameter for rbf kernel." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "7", - "limitation": null, - "ignored": false, - "docstring": "Parameter for knn kernel which need to be strictly positive." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Change maximum number of iterations allowed." - }, - { - "name": "tol", - "type": "Union[Literal[1e-3], float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance: threshold to consider the system at steady state." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Matrix representing a fully connected graph between each sample\n\nThis basic implementation creates a non-stochastic affinity matrix, so\nclass distributions will exceed 1 (normalization may be desired)." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Label Propagation classifier\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel which need to be strictly positive.\n\nmax_iter : int, default=1000\n Change maximum number of iterations allowed.\n\ntol : float, 1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples)\n Label assigned to each item via the transduction.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelPropagation\n>>> label_prop_model = LabelPropagation()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelPropagation(...)\n\nReferences\n----------\nXiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\nwith label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\nUniversity, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\nSee Also\n--------\nLabelSpreading : Alternate label propagation strategy more robust to noise." - }, - { - "name": "LabelSpreading", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['knn', 'rbf']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "String identifier for kernel function to use or the kernel function itself. Only 'rbf' and 'knn' strings are valid inputs. The function passed should take two inputs, each of shape (n_samples, n_features), and return a (n_samples, n_samples) shaped weight matrix." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "Parameter for rbf kernel." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "7", - "limitation": null, - "ignored": false, - "docstring": "Parameter for knn kernel which is a strictly positive integer." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Clamping factor. A value in (0, 1) that specifies the relative amount that an instance should adopt the information from its neighbors as opposed to its initial label. alpha=0 means keeping the initial label information; alpha=1 means replacing all initial information." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations allowed." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance: threshold to consider the system at steady state." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Graph matrix for Label Spreading computes the graph laplacian" - } - ], - "docstring": "LabelSpreading model for semi-supervised learning\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel which is a strictly positive integer.\n\nalpha : float, default=0.2\n Clamping factor. A value in (0, 1) that specifies the relative amount\n that an instance should adopt the information from its neighbors as\n opposed to its initial label.\n alpha=0 means keeping the initial label information; alpha=1 means\n replacing all initial information.\n\nmax_iter : int, default=30\n Maximum number of iterations allowed.\n\ntol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples,)\n Label assigned to each item via the transduction.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelSpreading\n>>> label_prop_model = LabelSpreading()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelSpreading(...)\n\nReferences\n----------\nDengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\nBernhard Schoelkopf. Learning with local and global consistency (2004)\nhttp://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.115.3219\n\nSee Also\n--------\nLabelPropagation : Unregularized graph based semi-supervised learning." - } - ], - "functions": [] - }, - { - "name": "sklearn.semi_supervised._self_training", - "imports": [ - "import warnings", - "import numpy as np", - "from base import MetaEstimatorMixin", - "from base import clone", - "from base import BaseEstimator", - "from utils.validation import check_is_fitted", - "from utils.metaestimators import if_delegate_has_method", - "from utils import safe_mask" - ], - "classes": [ - { - "name": "SelfTrainingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing ``fit`` and ``predict_proba``. Invoking the ``fit`` method will fit a clone of the passed estimator, which will be stored in the ``base_estimator_`` attribute." - }, - { - "name": "criterion", - "type": "Literal['threshold', 'k_best']", - "hasDefault": true, - "default": "'threshold'", - "limitation": null, - "ignored": false, - "docstring": "The selection criterion used to select which labels to add to the training set. If 'threshold', pseudo-labels with prediction probabilities above `threshold` are added to the dataset. If 'k_best', the `k_best` pseudo-labels with highest prediction probabilities are added to the dataset. When using the 'threshold' criterion, a :ref:`well calibrated classifier ` should be used." - }, - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The decision threshold for use with `criterion='threshold'`. Should be in [0, 1). When using the 'threshold' criterion, a :ref:`well calibrated classifier ` should be used." - }, - { - "name": "k_best", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The amount of samples to add in each iteration. Only used when `criterion` is k_best'." - }, - { - "name": "max_iter", - "type": "Optional[int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations allowed. Should be greater than or equal to 0. If it is ``None``, the classifier will continue to predict labels until no new pseudo-labels are added, or all unlabeled samples have been labeled." - }, - { - "name": "verbose: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output." - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the labels. Unlabeled samples should have the label -1." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits this ``SelfTrainingClassifier`` to a dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\ny : {array-like, sparse matrix} of shape (n_samples,)\n Array representing the labels. Unlabeled samples should have the\n label -1.\n\nReturns\n-------\nself : object\n Returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the classes of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n Array with predicted labels." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probability for each possible outcome.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Array with prediction probabilities." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calls decision function of the `base_estimator`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Result of the decision function of the `base_estimator`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict log probability for each possible outcome.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Array with log prediction probabilities." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calls score on the `base_estimator`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\ny : array-like of shape (n_samples,)\n Array representing the labels.\n\nReturns\n-------\nscore : float\n Result of calling score on the `base_estimator`." - } - ], - "docstring": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator object\n An estimator object implementing ``fit`` and ``predict_proba``.\n Invoking the ``fit`` method will fit a clone of the passed estimator,\n which will be stored in the ``base_estimator_`` attribute.\n\ncriterion : {'threshold', 'k_best'}, default='threshold'\n The selection criterion used to select which labels to add to the\n training set. If 'threshold', pseudo-labels with prediction\n probabilities above `threshold` are added to the dataset. If 'k_best',\n the `k_best` pseudo-labels with highest prediction probabilities are\n added to the dataset. When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\nthreshold : float, default=0.75\n The decision threshold for use with `criterion='threshold'`.\n Should be in [0, 1). When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\nk_best : int, default=10\n The amount of samples to add in each iteration. Only used when\n `criterion` is k_best'.\n\nmax_iter : int or None, default=10\n Maximum number of iterations allowed. Should be greater than or equal\n to 0. If it is ``None``, the classifier will continue to predict labels\n until no new pseudo-labels are added, or all unlabeled samples have\n been labeled.\n\nverbose: bool, default=False\n Enable verbose output.\n\nAttributes\n----------\nbase_estimator_ : estimator object\n The fitted estimator.\n\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output. (Taken from the trained\n ``base_estimator_``).\n\ntransduction_ : ndarray of shape (n_samples,)\n The labels used for the final fit of the classifier, including\n pseudo-labels added during fit.\n\nlabeled_iter_ : ndarray of shape (n_samples,)\n The iteration in which each sample was labeled. When a sample has\n iteration 0, the sample was already labeled in the original dataset.\n When a sample has iteration -1, the sample was not labeled in any\n iteration.\n\nn_iter_ : int\n The number of rounds of self-training, that is the number of times the\n base estimator is fitted on relabeled variants of the training set.\n\ntermination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n The reason that fitting was stopped.\n\n - 'max_iter': `n_iter_` reached `max_iter`.\n - 'no_change': no new labels were predicted.\n - 'all_labeled': all unlabeled samples were labeled before `max_iter`\n was reached.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import SelfTrainingClassifier\n>>> from sklearn.svm import SVC\n>>> rng = np.random.RandomState(42)\n>>> iris = datasets.load_iris()\n>>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n>>> iris.target[random_unlabeled_points] = -1\n>>> svc = SVC(probability=True, gamma=\"auto\")\n>>> self_training_model = SelfTrainingClassifier(svc)\n>>> self_training_model.fit(iris.data, iris.target)\nSelfTrainingClassifier(...)\n\nReferences\n----------\nDavid Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\nsupervised methods. In Proceedings of the 33rd annual meeting on\nAssociation for Computational Linguistics (ACL '95). Association for\nComputational Linguistics, Stroudsburg, PA, USA, 189-196. DOI:\nhttps://doi.org/10.3115/981658.981684" - } - ], - "functions": [ - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that an estimator implements the necessary methods." - } - ] - }, - { - "name": "sklearn.semi_supervised", - "imports": [ - "from _label_propagation import LabelPropagation", - "from _label_propagation import LabelSpreading", - "from _self_training import SelfTrainingClassifier" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.semi_supervised.tests.test_label_propagation", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import issparse", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.semi_supervised import _label_propagation as label_propagation", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.model_selection import train_test_split", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.datasets import make_classification", - "from sklearn.exceptions import ConvergenceWarning", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_fit_transduction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_spreading_closed_form", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_propagation_closed_form", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_valid_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_speed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_propagation_non_zero_normalizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_sparse_callable_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.semi_supervised.tests.test_self_training", - "imports": [ - "from math import ceil", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "import pytest", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.svm import SVC", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import accuracy_score", - "from sklearn.semi_supervised import SelfTrainingClassifier" - ], - "classes": [], - "functions": [ - { - "name": "test_missing_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_none_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_params_selection_crit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warns_k_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_none_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prefitted_throws_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_labeled_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_unlabeled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_strings_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose_k_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_best_selects_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator_meta_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.semi_supervised.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.svm.setup", - "imports": [ - "import os", - "from os.path import join", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.svm._base", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from None import _libsvm as libsvm", - "from None import _liblinear as liblinear", - "from None import _libsvm_sparse as libsvm_sparse", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from preprocessing import LabelEncoder", - "from utils.multiclass import _ovr_decision_function", - "from utils import check_array", - "from utils import check_random_state", - "from utils import column_or_1d", - "from utils import compute_class_weight", - "from utils.deprecation import deprecated", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_large_sparse", - "from utils.validation import _num_samples", - "from utils.validation import _check_sample_weight", - "from utils.validation import check_consistent_length", - "from utils.multiclass import check_classification_targets", - "from exceptions import ConvergenceWarning", - "from exceptions import NotFittedError" - ], - "classes": [ - { - "name": "BaseLibSVM", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. For kernel=\"precomputed\", the expected shape of X is (n_samples, n_samples)." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the SVM model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples, n_samples).\n\ny : array-like of shape (n_samples,)\n Target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nIf X and y are not C-ordered and contiguous arrays of np.float64 and\nX is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\nIf X is a dense array, then the other methods will not support sparse\nmatrices as input." - }, - { - "name": "_validate_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation of y and class_weight.\n\nDefault implementation for SVR and one-class; overridden in BaseSVC." - }, - { - "name": "_warn_from_fit_status", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_dense_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform regression on samples in X.\n\nFor an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)" - }, - { - "name": "_dense_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the data transformed by a callable kernel" - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluates the decision function for the samples in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n Returns the decision function of the sample for each class\n in the model." - }, - { - "name": "_dense_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_for_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_support_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class." - }, - { - "name": "BaseSVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluates the decision function for the samples in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n If decision_function_shape='ovr', the shape is (n_samples,\n n_classes).\n\nNotes\n-----\nIf decision_function_shape='ovo', the function values are proportional\nto the distance of the samples X to the separating hyperplane. If the\nexact distances are required, divide the function values by the norm of\nthe weight vector (``coef_``). See also `this question\n`_ for further details.\nIf decision_function_shape='ovr', the decision function is a monotonic\ntransformation of ovo decision function." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on samples in X.\n\nFor an one-class model, +1 or -1 is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Class labels for samples in X." - }, - { - "name": "_check_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\nT : ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\nNotes\n-----\nThe probability model is created using cross validation, so\nthe results can be slightly different than those obtained by\npredict. Also, it will produce meaningless results on very small\ndatasets." - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute log probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\nT : ndarray of shape (n_samples, n_classes)\n Returns the log-probabilities of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\nNotes\n-----\nThe probability model is created using cross validation, so\nthe results can be slightly different than those obtained by\npredict. Also, it will produce meaningless results on very small\ndatasets." - }, - { - "name": "_predict_log_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_dense_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probA_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probB_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "ABC for LibSVM-based classifiers." - } - ], - "functions": [ - { - "name": "_one_vs_one_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate primal coefficients from dual coefficients\nfor the one-vs-one multi class LibSVM in the case\nof a linear kernel." - }, - { - "name": "_get_liblinear_solver_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n - multi_class\n - penalty\n - loss\n - dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use." - }, - { - "name": "_fit_liblinear", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X" - }, - { - "name": "C", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Inverse of cross-validation parameter. Lower the C, the more the penalization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to fit the intercept, that is to add a intercept term to the decision function." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "LibLinear internally penalizes the intercept and this term is subject to regularization just like the other terms of the feature vector. In order to avoid this, one should increase the intercept_scaling. such that the feature vector becomes [x, intercept_scaling]." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The norm of the penalty used in regularization." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation," - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set verbose to any positive number for verbosity." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping condition." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'crammer_singer']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "`ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer` optimizes a joint objective over all classes. While `crammer_singer` is interesting from an theoretical perspective as it is consistent it is seldom used in practice and rarely leads to better accuracy and is more expensive to compute. If `crammer_singer` is chosen, the options loss, penalty and dual will be ignored." - }, - { - "name": "loss", - "type": "Literal['logistic_regression', 'hinge', 'squared_hinge', 'epsilon_insensitive']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function used to fit the model." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon parameter in the epsilon-insensitive loss function. Note that the value of this parameter depends on the scale of the target variable y. If unsure, set epsilon=0." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights assigned to each sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.\n\nPreprocessing is done in this function before supplying it to liblinear.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X\n\nC : float\n Inverse of cross-validation parameter. Lower the C, the more\n the penalization.\n\nfit_intercept : bool\n Whether or not to fit the intercept, that is to add a intercept\n term to the decision function.\n\nintercept_scaling : float\n LibLinear internally penalizes the intercept and this term is subject\n to regularization just like the other terms of the feature vector.\n In order to avoid this, one should increase the intercept_scaling.\n such that the feature vector becomes [x, intercept_scaling].\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\npenalty : {'l1', 'l2'}\n The norm of the penalty used in regularization.\n\ndual : bool\n Dual or primal formulation,\n\nverbose : int\n Set verbose to any positive number for verbosity.\n\nmax_iter : int\n Number of iterations.\n\ntol : float\n Stopping condition.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`\n optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from an theoretical perspective\n as it is consistent it is seldom used in practice and rarely leads to\n better accuracy and is more expensive to compute.\n If `crammer_singer` is chosen, the options loss, penalty and dual will\n be ignored.\n\nloss : {'logistic_regression', 'hinge', 'squared_hinge', 'epsilon_insensitive', 'squared_epsilon_insensitive}, default='logistic_regression'\n The loss function used to fit the model.\n\nepsilon : float, default=0.1\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set epsilon=0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights assigned to each sample.\n\nReturns\n-------\ncoef_ : ndarray of shape (n_features, n_features + 1)\n The coefficient vector got by minimizing the objective function.\n\nintercept_ : float\n The intercept term added to the vector.\n\nn_iter_ : int\n Maximum number of iterations run across all classes." - } - ] - }, - { - "name": "sklearn.svm._bounds", - "imports": [ - "import numpy as np", - "from preprocessing import LabelBinarizer", - "from utils.validation import check_consistent_length", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import safe_sparse_dot" - ], - "classes": [], - "functions": [ - { - "name": "l1_min_c", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "loss", - "type": "Literal['squared_hinge', 'log']", - "hasDefault": true, - "default": "'squared_hinge'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the loss function. With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss). With 'log' it is the loss of logistic regression models." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if the intercept should be fitted by the model. It must match the fit() method parameter." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "when fit_intercept is True, instance vector x becomes [x, intercept_scaling], i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. It must match the fit() method parameter." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the lowest bound for C such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nloss : {'squared_hinge', 'log'}, default='squared_hinge'\n Specifies the loss function.\n With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n With 'log' it is the loss of logistic regression models.\n\nfit_intercept : bool, default=True\n Specifies if the intercept should be fitted by the model.\n It must match the fit() method parameter.\n\nintercept_scaling : float, default=1.0\n when fit_intercept is True, instance vector x becomes\n [x, intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n It must match the fit() method parameter.\n\nReturns\n-------\nl1_min_c : float\n minimum value for C" - } - ] - }, - { - "name": "sklearn.svm._classes", - "imports": [ - "import numpy as np", - "from _base import _fit_liblinear", - "from _base import BaseSVC", - "from _base import BaseLibSVM", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import OutlierMixin", - "from linear_model._base import LinearClassifierMixin", - "from linear_model._base import SparseCoefMixin", - "from linear_model._base import LinearModel", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "LinearSVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "penalty", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the norm used in the penalization. The 'l2' penalty is the standard used in SVC. The 'l1' leads to ``coef_`` vectors that are sparse." - }, - { - "name": "loss", - "type": "Literal['hinge', 'squared_hinge']", - "hasDefault": true, - "default": "'squared_hinge'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the loss function. 'hinge' is the standard SVM loss (used e.g. by the SVC class) while 'squared_hinge' is the square of the hinge loss. The combination of ``penalty='l1'`` and ``loss='hinge'`` is not supported." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'crammer_singer']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "Determines the multi-class strategy if `y` contains more than two classes. ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while ``\"crammer_singer\"`` optimizes a joint objective over all classes. While `crammer_singer` is interesting from a theoretical perspective as it is consistent, it is seldom used in practice as it rarely leads to better accuracy and is more expensive to compute. If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual will be ignored." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be already centered)." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When self.fit_intercept is True, instance vector x becomes ``[x, self.intercept_scaling]``, i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set the parameter C of class i to ``class_weight[i]*C`` for SVC. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in liblinear that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data for the dual coordinate descent (if ``dual=True``). When ``dual=False`` the underlying implementation of :class:`LinearSVC` is not random and ``random_state`` has no effect on the results. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations to be run." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n An instance of the estimator." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npenalty : {'l1', 'l2'}, default='l2'\n Specifies the norm used in the penalization. The 'l2'\n penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n vectors that are sparse.\n\nloss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n Specifies the loss function. 'hinge' is the standard SVM loss\n (used e.g. by the SVC class) while 'squared_hinge' is the\n square of the hinge loss. The combination of ``penalty='l1'``\n and ``loss='hinge'`` is not supported.\n\ndual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n Determines the multi-class strategy if `y` contains more than\n two classes.\n ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from a theoretical perspective\n as it is consistent, it is seldom used in practice as it rarely leads\n to better accuracy and is more expensive to compute.\n If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n will be ignored.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1\n When self.fit_intercept is True, instance vector x becomes\n ``[x, self.intercept_scaling]``,\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n Set the parameter C of class i to ``class_weight[i]*C`` for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n underlying implementation of :class:`LinearSVC` is not random and\n ``random_state`` has no effect on the results.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_iter : int, default=1000\n The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n ``coef_`` is a readonly property derived from ``raw_coef_`` that\n follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.\n\nSee Also\n--------\nSVC : Implementation of Support Vector Machine classifier using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\n Furthermore SVC multi-class mode is implemented using one\n vs one scheme while LinearSVC uses one vs the rest. It is\n possible to implement one vs the rest with SVC by using the\n :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n Finally SVC can fit dense data without memory copy if the input\n is C-contiguous. Sparse data will still incur memory copy though.\n\nsklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n cost function as LinearSVC\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon\nto have slightly different results for the same input data. If\nthat happens, try with a smaller ``tol`` parameter.\n\nThe underlying implementation, liblinear, uses a sparse internal\nrepresentation for the data that will incur a memory copy.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear `\nin the narrative documentation.\n\nReferences\n----------\n`LIBLINEAR: A Library for Large Linear Classification\n`__\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = make_pipeline(StandardScaler(),\n... LinearSVC(random_state=0, tol=1e-5))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n>>> print(clf.named_steps['linearsvc'].coef_)\n[[0.141... 0.526... 0.679... 0.493...]]\n\n>>> print(clf.named_steps['linearsvc'].intercept_)\n[0.1693...]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]" - }, - { - "name": "LinearSVR", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon parameter in the epsilon-insensitive loss function. Note that the value of this parameter depends on the scale of the target variable y. If unsure, set ``epsilon=0``." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive." - }, - { - "name": "loss", - "type": "Literal['epsilon_insensitive', 'squared_epsilon_insensitive']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies the loss function. The epsilon-insensitive loss (standard SVR) is the L1 loss, while the squared epsilon-insensitive loss ('squared_epsilon_insensitive') is the L2 loss." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be already centered)." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When self.fit_intercept is True, instance vector x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in liblinear that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations to be run." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X" - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n An instance of the estimator." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nepsilon : float, default=0.0\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set ``epsilon=0``.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\nloss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, default='epsilon_insensitive'\n Specifies the loss function. The epsilon-insensitive loss\n (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n loss ('squared_epsilon_insensitive') is the L2 loss.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1.\n When self.fit_intercept is True, instance vector x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\ndual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\nverbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_iter : int, default=1000\n The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is a readonly property derived from `raw_coef_` that\n follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n Constants in decision function.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = make_pipeline(StandardScaler(),\n... LinearSVR(random_state=0, tol=1e-5))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n>>> print(regr.named_steps['linearsvr'].coef_)\n[18.582... 27.023... 44.357... 64.522...]\n>>> print(regr.named_steps['linearsvr'].intercept_)\n[-4...]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-2.384...]\n\n\nSee Also\n--------\nLinearSVC : Implementation of Support Vector Machine classifier using the\n same library as this class (liblinear).\n\nSVR : Implementation of Support Vector Machine regression using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\nsklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n function as LinearSVR\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes." - }, - { - "name": "SVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "probability", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - }, - { - "name": "decision_function_shape", - "type": "Literal['ovo', 'ovr']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "Whether to return a one-vs-rest ('ovr') decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one ('ovo') decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one ('ovo') is always used as multi-class strategy. The parameter is ignored for binary classification. .. versionchanged:: 0.19 decision_function_shape is 'ovr' by default. .. versionadded:: 0.17 *decision_function_shape='ovr'* is recommended. .. versionchanged:: 0.17 Deprecated *decision_function_shape='ovo' and None*." - }, - { - "name": "break_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, ``decision_function_shape='ovr'``, and number of classes > 2, :term:`predict` will break ties according to the confidence values of :term:`decision_function`; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict. .. versionadded:: 0.22" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when `probability` is False. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive. The penalty\n is a squared l2 penalty.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to pre-compute the kernel matrix from data matrices; that matrix\n should be an array of shape ``(n_samples, n_samples)``.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\nprobability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nclass_weight : dict or 'balanced', default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is a readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes -1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\nsupport_ : ndarray of shape (n_SV)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.svm import SVC\n>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('svc', SVC(gamma='auto'))])\n\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nSVR : Support Vector Machine for Regression implemented using libsvm.\n\nLinearSVC : Scalable Linear Support Vector Machine for classification\n implemented using liblinear. Check the See Also section of\n LinearSVC for more comparison element.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "NuSVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "An upper bound on the fraction of margin errors (see :ref:`User Guide `) and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "probability", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - }, - { - "name": "decision_function_shape", - "type": "Literal['ovo', 'ovr']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "Whether to return a one-vs-rest ('ovr') decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one ('ovo') decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one ('ovo') is always used as multi-class strategy. The parameter is ignored for binary classification. .. versionchanged:: 0.19 decision_function_shape is 'ovr' by default. .. versionadded:: 0.17 *decision_function_shape='ovr'* is recommended. .. versionchanged:: 0.17 Deprecated *decision_function_shape='ovo' and None*." - }, - { - "name": "break_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, ``decision_function_shape='ovr'``, and number of classes > 2, :term:`predict` will break ties according to the confidence values of :term:`decision_function`; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict. .. versionadded:: 0.22" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when `probability` is False. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnu : float, default=0.5\n An upper bound on the fraction of margin errors (see :ref:`User Guide\n `) and a lower bound of the fraction of support vectors.\n Should be in the interval (0, 1].\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\nprobability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nclass_weight : {dict, 'balanced'}, default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one. The \"balanced\" mode uses the values of y to automatically\n adjust weights inversely proportional to class frequencies as\n ``n_samples / (n_classes * np.bincount(y))``\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C of each class.\n Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\nfit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nfit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.svm import NuSVC\n>>> clf = make_pipeline(StandardScaler(), NuSVC())\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nSVC : Support Vector Machine for classification using libsvm.\n\nLinearSVC : Scalable linear Support Vector Machine for classification using\n liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "SVR", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probA_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probB_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n The penalty is a squared l2 penalty.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n within which no penalty is associated in the training loss function\n with points predicted within a distance epsilon from the actual\n value.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constants in decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('svr', SVR(epsilon=0.2))])\n\nSee Also\n--------\nNuSVR : Support Vector Machine for regression implemented using libsvm\n using a parameter to control the number of support vectors.\n\nLinearSVR : Scalable Linear Support Vector Machine for regression\n implemented using liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "NuSVR", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. By default 0.5 will be taken." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Penalty parameter C of the error term." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnu : float, default=0.5\n An upper bound on the fraction of training errors and a lower bound of\n the fraction of support vectors. Should be in the interval (0, 1]. By\n default 0.5 will be taken.\n\nC : float, default=1.0\n Penalty parameter C of the error term.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constants in decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import NuSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> np.random.seed(0)\n>>> y = np.random.randn(n_samples)\n>>> X = np.random.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('nusvr', NuSVR(nu=0.1))])\n\nSee Also\n--------\nNuSVC : Support Vector Machine for classification implemented with libsvm\n with a parameter to control the number of support vectors.\n\nSVR : Epsilon Support Vector Machine for regression implemented with\n libsvm.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "OneClassSVM", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. By default 0.5 will be taken." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set of samples, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Detects the soft boundary of the set of samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Set of samples, where n_samples is the number of samples and\n n_features is the number of features.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\ny : Ignored\n not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nIf X is not a C-ordered contiguous array it is copied." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Signed distance to the separating hyperplane.\n\nSigned distance is positive for an inlier and negative for an outlier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndec : ndarray of shape (n_samples,)\n Returns the decision function of the samples." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raw scoring function of the samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nscore_samples : ndarray of shape (n_samples,)\n Returns the (unshifted) scoring function of the samples." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on samples in X.\n\nFor a one-class model, +1 or -1 is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Class labels for samples in X." - }, - { - "name": "probA_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probB_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\nnu : float, default=0.5\n An upper bound on the fraction of training\n errors and a lower bound of the fraction of support\n vectors. Should be in the interval (0, 1]. By default 0.5\n will be taken.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vectors in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constant in the decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: decision_function = score_samples - `offset_`.\n The offset is the opposite of `intercept_` and is provided for\n consistency with other outlier detection algorithms.\n\n .. versionadded:: 0.20\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import OneClassSVM\n>>> X = [[0], [0.44], [0.45], [0.46], [1]]\n>>> clf = OneClassSVM(gamma='auto').fit(X)\n>>> clf.predict(X)\narray([-1, 1, 1, 1, -1])\n>>> clf.score_samples(X)\narray([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])" - } - ], - "functions": [] - }, - { - "name": "sklearn.svm", - "imports": [ - "from _classes import SVC", - "from _classes import NuSVC", - "from _classes import SVR", - "from _classes import NuSVR", - "from _classes import OneClassSVM", - "from _classes import LinearSVC", - "from _classes import LinearSVR", - "from _bounds import l1_min_c" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.svm.tests.test_bounds", - "imports": [ - "import numpy as np", - "from scipy import sparse as sp", - "from scipy import stats", - "import pytest", - "from sklearn.svm._bounds import l1_min_c", - "from sklearn.svm import LinearSVC", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.svm._newrand import set_seed_wrap", - "from sklearn.svm._newrand import bounded_rand_int_wrap", - "from sklearn.utils._testing import assert_raise_message" - ], - "classes": [], - "functions": [ - { - "name": "test_l1_min_c", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_min_c_l2_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_l1_min_c", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ill_posed_min_c", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupported_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_newrand_set_seed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `set_seed` produces deterministic results" - }, - { - "name": "test_newrand_set_seed_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `set_seed_wrap` is defined for unsigned 32bits ints" - }, - { - "name": "test_newrand_bounded_rand_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `bounded_rand_int` follows a uniform distribution" - }, - { - "name": "test_newrand_bounded_rand_int_limits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `bounded_rand_int_wrap` is defined for unsigned 32bits ints" - } - ] - }, - { - "name": "sklearn.svm.tests.test_sparse", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from scipy import sparse", - "from sklearn import datasets", - "from sklearn import svm", - "from sklearn import linear_model", - "from sklearn import base", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import make_blobs", - "from sklearn.svm.tests import test_svm", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils.extmath import safe_sparse_dot", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import skip_if_32bit" - ], - "classes": [], - "functions": [ - { - "name": "check_svm_model_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that sparse SVC gives the same result as SVC" - }, - { - "name": "test_unsorted_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_with_custom_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_liblinear_intercept_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_oneclasssvm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_realdata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_svc_clone_with_callable_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_timeout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.svm.tests.test_svm", - "imports": [ - "import numpy as np", - "import itertools", - "import pytest", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_allclose", - "from scipy import sparse", - "from sklearn import svm", - "from sklearn import linear_model", - "from sklearn import datasets", - "from sklearn import metrics", - "from sklearn import base", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import f1_score", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils import shuffle", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.exceptions import UndefinedMetricWarning", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.svm import _libsvm", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.utils import compute_class_weight", - "import os" - ], - "classes": [], - "functions": [ - { - "name": "test_libsvm_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_libsvm_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvr_fit_sampleweight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tweak_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_function_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_classifier_sided_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_regressor_sided_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_equivalence_sample_weight_C", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_sample_weights_mask_all_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_weights_svc_leave_just_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_weights_svc_leave_two_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_weight_equal_coeffs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_gamma_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unicode_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_fit_support_vectors_empty", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_svx_uppercase_loss_penality_raises_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_crammer_singer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_fit_sampleweight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_crammer_singer_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dense_liblinear_intercept_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_set_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_immutable_coef_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_clone_with_callable_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_bad_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_timeout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_svm_convergence_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr_coef_sign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_svc_intercept_scaling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lsvc_intercept_scaling_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasattr_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_function_shape_two_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_invalid_break_ties_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_ovr_tie_breaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if predict breaks ties in OVR mode.\nRelated issue: https://github.com/scikit-learn/scikit-learn/issues/8277" - }, - { - "name": "test_gamma_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gamma_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvm_liblinear_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_support_oneclass_svr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_probA_proB_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_kernel_not_array_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test using a custom kernel that is not fed with array-like for floats" - } - ] - }, - { - "name": "sklearn.svm.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.tests.test_base", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "import sklearn", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.base import BaseEstimator", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.base import _is_pairwise", - "from sklearn.svm import SVC", - "from sklearn.pipeline import Pipeline", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.decomposition import KernelPCA", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn import datasets", - "from sklearn.base import TransformerMixin", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn import config_context", - "import pickle", - "from sklearn.feature_selection import SelectFpr", - "from sklearn.feature_selection import f_classif" - ], - "classes": [ - { - "name": "MyEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "K", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "T", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoNaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "OverrideTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DiamondOverwriteTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "InheritDiamondOverwriteTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "ModifyInitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Deprecated behavior.\nEqual parameters but with a type cast.\nDoesn't fulfill a is a" - }, - { - "name": "Buggy", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A buggy estimator that does not set its parameters right. " - }, - { - "name": "NoEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "VargEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "scikit-learn estimators shouldn't have vargs." - }, - { - "name": "TreeBadVersion", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TreeNoVersion", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DontPickleAttributeMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MultiInheritanceEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SingleInheritanceEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_buggy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_empty_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_sparse_matrices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_estimator_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_class_rather_than_instance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_str", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params_passes_all_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params_updates_valid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_pandas_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_warning_is_not_raised_with_matching_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_warning_is_issued_upon_different_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_warning_is_issued_when_no_version_info_in_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_when_getstate_is_overwritten_by_mixin", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_works_when_getstate_is_overwritten_in_the_child_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tag_inheritance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_on_get_params_non_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr_mimebundle_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr_html_wraps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_build", - "imports": [ - "import os", - "import pytest", - "import textwrap", - "from sklearn import __version__", - "from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled" - ], - "classes": [], - "functions": [ - { - "name": "test_openmp_parallelism_enabled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_calibration", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from scipy import sparse", - "from sklearn.base import BaseEstimator", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.extmath import softmax", - "from sklearn.exceptions import NotFittedError", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.naive_bayes import MultinomialNB", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.svm import LinearSVC", - "from sklearn.isotonic import IsotonicRegression", - "from sklearn.feature_extraction import DictVectorizer", - "from sklearn.pipeline import Pipeline", - "from sklearn.impute import SimpleImputer", - "from sklearn.metrics import brier_score_loss", - "from sklearn.calibration import CalibratedClassifierCV", - "from sklearn.calibration import _sigmoid_calibration", - "from sklearn.calibration import _SigmoidCalibration", - "from sklearn.calibration import calibration_curve" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_bad_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_default_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_cv_splitter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_execution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test parallel calibration" - }, - { - "name": "test_calibration_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_prefit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test calibration for prefitted classifiers" - }, - { - "name": "test_calibration_ensemble_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sigmoid_calibration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test calibration values with Platt sigmoid model" - }, - { - "name": "test_calibration_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check calibration_curve function" - }, - { - "name": "test_calibration_nan_imputer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that calibration can accept nan" - }, - { - "name": "test_calibration_prob_sum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_less_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_accepts_ndarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that calibration accepts n-dimensional arrays as input" - }, - { - "name": "text_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "text_data_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibrated_classifier_cv_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_check_build", - "imports": [ - "from sklearn.__check_build import raise_build_error", - "from sklearn.utils._testing import assert_raises" - ], - "classes": [], - "functions": [ - { - "name": "test_raise_build_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_common", - "imports": [ - "import os", - "import warnings", - "import sys", - "import re", - "import pkgutil", - "from inspect import isgenerator", - "from functools import partial", - "import pytest", - "from sklearn.utils import all_estimators", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import FitFailedWarning", - "from sklearn.utils.estimator_checks import check_estimator", - "import sklearn", - "from sklearn.base import BiclusterMixin", - "from sklearn.decomposition import PCA", - "from sklearn.linear_model._base import LinearClassifierMixin", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Ridge", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RandomizedSearchCV", - "from sklearn.pipeline import make_pipeline", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils._testing import SkipTest", - "from sklearn.utils.estimator_checks import _construct_instance", - "from sklearn.utils.estimator_checks import _set_checking_parameters", - "from sklearn.utils.estimator_checks import _get_check_estimator_ids", - "from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier", - "from sklearn.utils.estimator_checks import parametrize_with_checks", - "from sklearn.utils.estimator_checks import check_n_features_in_after_fitting" - ], - "classes": [], - "functions": [ - { - "name": "test_all_estimator_no_base_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sample_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_check_estimator_ids", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_tested_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_generate_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_configure", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_tested_linear_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_balanced_linear_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_import_all_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_root_import_all_completeness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_tests_are_importable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_support_removed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_search_cv_instances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_n_features_in_after_fitting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_config", - "imports": [ - "from sklearn import get_config", - "from sklearn import set_config", - "from sklearn import config_context", - "from sklearn.utils._testing import assert_raises" - ], - "classes": [], - "functions": [ - { - "name": "test_config_context", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_config_context_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_config", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_discriminant_analysis", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import linalg", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.datasets import make_blobs", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis", - "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis", - "from sklearn.discriminant_analysis import _cov", - "from sklearn.covariance import ledoit_wolf", - "from sklearn.cluster import KMeans", - "from sklearn.covariance import ShrunkCovariance", - "from sklearn.covariance import LedoitWolf", - "from sklearn.preprocessing import StandardScaler" - ], - "classes": [], - "functions": [ - { - "name": "test_lda_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_explained_variance_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_orthogonality", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_scaling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_store_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_ledoitwolf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_dimension_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_numeric_consistency_float32_float64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda_store_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_on_same_number_of_classes_and_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests that if the number of samples equals the number\nof classes, a ValueError is raised." - } - ] - }, - { - "name": "sklearn.tests.test_docstring_parameters", - "imports": [ - "import inspect", - "import warnings", - "import importlib", - "from pkgutil import walk_packages", - "from inspect import signature", - "import numpy as np", - "import sklearn", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils._testing import check_docstring_parameters", - "from sklearn.utils._testing import _get_func_name", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils import all_estimators", - "from sklearn.utils.estimator_checks import _enforce_estimator_tags_y", - "from sklearn.utils.estimator_checks import _enforce_estimator_tags_x", - "from sklearn.utils.estimator_checks import _construct_instance", - "from sklearn.utils.deprecation import _is_deprecated", - "from sklearn.externals._pep562 import Pep562", - "from sklearn.datasets import make_classification", - "from sklearn.linear_model import LogisticRegression", - "import pytest", - "from numpydoc import docscrape" - ], - "classes": [], - "functions": [ - { - "name": "test_docstring_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tabs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_construct_searchcv_instance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_docstring_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_dummy", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.base import clone", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.stats import _weighted_percentile", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.exceptions import NotFittedError" - ], - "classes": [], - "functions": [ - { - "name": "_check_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_behavior_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_behavior_2d_for_constant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_equality_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy_with_2d_column_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_string_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_score_with_None", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_prediction_independent_of_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_median_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_median_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_strategy_empty_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_y_mean_attribute_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unknown_strategey_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constants_not_specified_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_size_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_sparse_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_strategy_sparse_target_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_strategy_sparse_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy_sparse_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_regressor_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_regressor_on_3D_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_classifier_on_3D_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_regressor_return_std", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_score_with_None", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_prediction_independent_of_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_of_classifier_probas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_init", - "imports": [ - "from sklearn import *" - ], - "classes": [], - "functions": [ - { - "name": "test_import_skl", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_isotonic", - "imports": [ - "import warnings", - "import numpy as np", - "import pickle", - "import copy", - "import pytest", - "from sklearn.isotonic import check_increasing", - "from sklearn.isotonic import isotonic_regression", - "from sklearn.isotonic import IsotonicRegression", - "from sklearn.isotonic import _make_unique", - "from sklearn.utils.validation import check_array", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils import shuffle", - "from scipy.special import expit" - ], - "classes": [], - "functions": [ - { - "name": "test_permutation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_small_number_of_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_up", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_up_extreme", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_down", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_down_extreme", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_ci_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_ties_min", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_ties_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_ties_secondary_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test isotonic regression fit, transform and fit_transform\nagainst the \"secondary\" ties method and \"pituitary\" data from R\n \"isotone\" package, as detailed in: J. d. Leeuw, K. Hornik, P. Mair,\n Isotone Optimization in R: Pool-Adjacent-Violators Algorithm\n(PAVA) and Active Set Methods\n\nSet values based on pituitary example and\n the following R command detailed in the paper above:\n> library(\"isotone\")\n> data(\"pituitary\")\n> res1 <- gpava(pituitary$age, pituitary$size, ties=\"secondary\")\n> res1$x\n\n`isotone` version: 1.0-2, 2014-09-07\nR version: R version 3.1.1 (2014-07-10)" - }, - { - "name": "test_isotonic_regression_with_ties_in_differently_sized_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Non-regression test to handle issue 9432:\nhttps://github.com/scikit-learn/scikit-learn/issues/9432\n\nCompare against output in R:\n> library(\"isotone\")\n> x <- c(0, 1, 1, 2, 3, 4)\n> y <- c(0, 0, 1, 0, 0, 1)\n> res1 <- gpava(x, y, ties=\"secondary\")\n> res1$x\n\n`isotone` version: 1.1-0, 2015-07-24\nR version: R version 3.3.2 (2016-10-31)" - }, - { - "name": "test_isotonic_regression_reversed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_auto_decreasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_auto_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_raises_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_sample_weight_parameter_default_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_min_max_boundaries", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_clip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_bad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_bad_after", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_duplicate_min_entry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_ymin_ymax", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_zero_weight_loop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fast_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_copy_before_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_mismatched_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_unique_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_unique_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_make_unique_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_non_regression_inf_slope", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_shape_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_2darray_more_than_1_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_kernel_approximation", - "imports": [ - "import numpy as np", - "from scipy.sparse import csr_matrix", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.metrics.pairwise import kernel_metrics", - "from sklearn.kernel_approximation import RBFSampler", - "from sklearn.kernel_approximation import AdditiveChi2Sampler", - "from sklearn.kernel_approximation import SkewedChi2Sampler", - "from sklearn.kernel_approximation import Nystroem", - "from sklearn.kernel_approximation import PolynomialCountSketch", - "from sklearn.metrics.pairwise import polynomial_kernel", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.metrics.pairwise import chi2_kernel" - ], - "classes": [], - "functions": [ - { - "name": "test_polynomial_count_sketch_raises_if_degree_lower_than_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_count_sketch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_linear_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_additive_chi2_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_skewed_chi2_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_additive_chi2_sampler_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensures correct error message" - }, - { - "name": "test_rbf_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_approximation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_default_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_singular_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_poly_kernel_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_kernel_ridge", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import Ridge", - "from sklearn.kernel_ridge import KernelRidge", - "from sklearn.metrics.pairwise import pairwise_kernels", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_array_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_kernel_ridge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_singular_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_precomputed_kernel_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_multi_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_metaestimators", - "imports": [ - "import functools", - "import numpy as np", - "from sklearn.base import BaseEstimator", - "from sklearn.datasets import make_classification", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.pipeline import Pipeline", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RandomizedSearchCV", - "from sklearn.feature_selection import RFE", - "from sklearn.feature_selection import RFECV", - "from sklearn.ensemble import BaggingClassifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.semi_supervised import SelfTrainingClassifier" - ], - "classes": [ - { - "name": "DelegatorData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_metaestimator_delegation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_min_dependencies_readme", - "imports": [ - "import os", - "import re", - "from pathlib import Path", - "import pytest", - "import sklearn", - "from sklearn._min_dependencies import dependent_packages", - "from sklearn.utils.fixes import parse_version" - ], - "classes": [], - "functions": [ - { - "name": "test_min_dependencies_readme", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_multiclass", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from re import escape", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.multiclass import OneVsOneClassifier", - "from sklearn.multiclass import OutputCodeClassifier", - "from sklearn.utils.multiclass import check_classification_targets", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils import check_array", - "from sklearn.utils import shuffle", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import recall_score", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import SVC", - "from sklearn.naive_bayes import MultinomialNB", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import ElasticNet", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import Perceptron", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import cross_val_score", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import make_pipeline", - "from sklearn.impute import SimpleImputer", - "from sklearn import svm", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_ovr_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_classification_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_partial_fit_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_ovo_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_fit_predict_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_always_present", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_fit_predict_svc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_single_label_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_single_label_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_coef_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_deprecated_coef_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_fit_on_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_partial_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_ties2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_string_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_one_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_float_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_float_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_delegate_sparse_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_tag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_cross_val_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_support_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_multioutput", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from joblib import cpu_count", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn import datasets", - "from sklearn.base import clone", - "from sklearn.datasets import make_classification", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import SGDRegressor", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import mean_squared_error", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.multioutput import ClassifierChain", - "from sklearn.multioutput import RegressorChain", - "from sklearn.multioutput import MultiOutputClassifier", - "from sklearn.multioutput import MultiOutputRegressor", - "from sklearn.svm import LinearSVC", - "from sklearn.base import ClassifierMixin", - "from sklearn.utils import shuffle", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.dummy import DummyRegressor", - "from sklearn.dummy import DummyClassifier", - "from sklearn.pipeline import make_pipeline", - "from sklearn.impute import SimpleImputer" - ], - "classes": [ - { - "name": "DummyRegressorWithFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyClassifierWithFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_multi_target_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_regression_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_regression_one_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sparse_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sample_weights_api", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sample_weight_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit_parallelism", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasattr_multi_output_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit_no_first_classes_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_multioutput_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_multioutput_estimator_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "generate_multilabel_dataset_with_correlations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_fit_and_predict_with_linear_svc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_fit_and_predict_with_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_vs_independent_models", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_fit_and_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_fit_and_predict_with_sparse_data_and_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_random_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_crossval_fit_and_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_estimator_with_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_chain_w_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_support_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_tuple_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_tuple_invalid_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_naive_bayes", - "imports": [ - "import pickle", - "from io import BytesIO", - "import numpy as np", - "import scipy.sparse", - "import pytest", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import load_iris", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.naive_bayes import GaussianNB", - "from sklearn.naive_bayes import BernoulliNB", - "from sklearn.naive_bayes import MultinomialNB", - "from sklearn.naive_bayes import ComplementNB", - "from sklearn.naive_bayes import CategoricalNB" - ], - "classes": [], - "functions": [ - { - "name": "test_gnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether sample weights are properly used in GNB. " - }, - { - "name": "test_gnb_neg_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether an error is raised in case of negative priors" - }, - { - "name": "test_gnb_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether the class prior override is properly used" - }, - { - "name": "test_gnb_priors_sum_isclose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_wrong_nb_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether an error is raised if the number of prior is different\nfrom the number of class" - }, - { - "name": "test_gnb_prior_greater_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if an error is raised if the sum of prior greater than one" - }, - { - "name": "test_gnb_prior_large_bias", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if good prediction when class prior favor largely one class" - }, - { - "name": "test_gnb_check_update_with_no_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test when the partial fit is called without any data" - }, - { - "name": "test_gnb_pfit_wrong_nb_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether an error is raised when the number of feature changes\nbetween two partial fit" - }, - { - "name": "test_gnb_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_naive_bayes_scale_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_deprecated_coef_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_input_check_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_input_check_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_uniform_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_provide_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_provide_prior_with_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_sample_weight_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_coef_intercept_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mnnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mnb_prior_unobserved_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mnb_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bnb_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categoricalnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categoricalnb_with_min_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categoricalnb_min_categories_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_alpha_vector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_accuracy_on_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_pipeline", - "imports": [ - "from tempfile import mkdtemp", - "import shutil", - "import time", - "import re", - "import itertools", - "import pytest", - "import numpy as np", - "from scipy import sparse", - "import joblib", - "from sklearn.utils.fixes import parse_version", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import MinimalClassifier", - "from sklearn.utils._testing import MinimalRegressor", - "from sklearn.utils._testing import MinimalTransformer", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import FeatureUnion", - "from sklearn.pipeline import make_pipeline", - "from sklearn.pipeline import make_union", - "from sklearn.svm import SVC", - "from sklearn.neighbors import LocalOutlierFactor", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import LinearRegression", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import r2_score", - "from sklearn.cluster import KMeans", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import f_classif", - "from sklearn.dummy import DummyRegressor", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.datasets import load_iris", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.feature_extraction.text import CountVectorizer", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.impute import SimpleImputer" - ], - "classes": [ - { - "name": "NoFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Small class to test parameter dispatching.\n " - }, - { - "name": "NoTrans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoInvTransf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Transf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TransfFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Mult", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "FitParamT", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mock classifier\n " - }, - { - "name": "DummyTransf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transformer which store the column means" - }, - { - "name": "DummyEstimatorParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mock classifier that takes params on predict" - }, - { - "name": "DummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "WrongDummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_pipeline_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_init_tuple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_methods_anova", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_sample_weight_supported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_sample_weight_unsupported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_raise_set_params_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_methods_pca_svm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_score_samples_pca_lof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples_on_pipeline_without_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_methods_preprocessing_svm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_predict_on_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_predict_on_pipeline_without_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_predict_with_intermediate_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_with_predict_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_union_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_slice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_index", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_pipeline_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_named_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_correctly_adjusts_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_pipeline_step_passthrough", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_ducktyping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_feature_union_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_feature_union_step_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_step_name_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params_nested_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_wrong_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_with_cache_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_pipeline_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_param_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_feature_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_missing_values_leniency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_warns_unknown_transformer_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_get_tags_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_using_minimal_compatible_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_random_projection", - "imports": [ - "import functools", - "from typing import List", - "from typing import Any", - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.metrics import euclidean_distances", - "from sklearn.random_projection import johnson_lindenstrauss_min_dim", - "from sklearn.random_projection import _gaussian_random_matrix", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.random_projection import SparseRandomProjection", - "from sklearn.random_projection import GaussianRandomProjection", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.exceptions import DataDimensionalityWarning" - ], - "classes": [], - "functions": [ - { - "name": "make_sparse_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "densify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_jl_domain", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_size_jl_min_dim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_input_size_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_size_generated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_zero_mean_and_unit_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_input_with_sparse_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_basic_property_of_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_basic_property_of_sparse_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_random_projection_transformer_invalid_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_projection_transformer_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_try_to_transform_before_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_too_many_samples_to_find_a_safe_embedding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_projection_embedding_quality", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_SparseRandomProjection_output_representation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_RandomProjection_dimensions_embedding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_n_components_greater_than_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_works_with_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.tree.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree._classes", - "imports": [ - "import numbers", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from math import ceil", - "import numpy as np", - "from scipy.sparse import issparse", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import clone", - "from base import RegressorMixin", - "from base import is_classifier", - "from base import MultiOutputMixin", - "from utils import Bunch", - "from utils import check_random_state", - "from utils.validation import _check_sample_weight", - "from utils import compute_sample_weight", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from _criterion import Criterion", - "from _splitter import Splitter", - "from _tree import DepthFirstTreeBuilder", - "from _tree import BestFirstTreeBuilder", - "from _tree import Tree", - "from _tree import _build_pruned_tree_ccp", - "from _tree import ccp_pruning_path", - "from None import _tree", - "from None import _splitter", - "from None import _criterion" - ], - "classes": [ - { - "name": "BaseDecisionTree", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the depth of the decision tree.\n\nThe depth of a tree is the maximum distance between the root\nand any leaf.\n\nReturns\n-------\nself.tree_.max_depth : int\n The maximum depth of the tree." - }, - { - "name": "get_n_leaves", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the number of leaves of the decision tree.\n\nReturns\n-------\nself.tree_.n_leaves : int\n Number of leaves." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_X_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate the training data on predict (probabilities)." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes, or the predict values." - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples,)\n For each datapoint x in X, return the index of the leaf x\n ends up in. Leaves are numbered within\n ``[0; self.tree_.node_count)``, possibly with gaps in the\n numbering." - }, - { - "name": "decision_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return the decision path in the tree.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator CSR matrix where non zero elements\n indicates that the samples goes through the nodes." - }, - { - "name": "_prune_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Prune tree using Minimal Cost-Complexity Pruning." - }, - { - "name": "cost_complexity_pruning_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels) as integers or strings." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the pruning path during Minimal Cost-Complexity Pruning.\n\nSee :ref:`minimal_cost_complexity_pruning` for details on the pruning\nprocess.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\nReturns\n-------\nccp_path : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n ccp_alphas : ndarray\n Effective alphas of subtree during pruning.\n\n impurities : ndarray\n Sum of the impurities of the subtree leaves for the\n corresponding alpha value in ``ccp_alphas``." - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the feature importances.\n\nThe importance of a feature is computed as the (normalized) total\nreduction of the criterion brought by that feature.\nIt is also known as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n Normalized total reduction of criteria by feature\n (Gini importance)." - } - ], - "docstring": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead." - }, - { - "name": "DecisionTreeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain." - }, - { - "name": "splitter", - "type": "Literal[\"best\", \"random\"]", - "hasDefault": true, - "default": "\"best\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``\"best\"``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary ` for details." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "class_weight", - "type": "Union[Dict, List[Dict], Literal[\"balanced\"]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels) as integers or strings." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - }, - { - "name": "X_idx_sorted", - "type": null, - "hasDefault": true, - "default": "\"deprecated\"", - "limitation": null, - "ignored": false, - "docstring": "This parameter is deprecated and has no effect. It will be removed in 1.1 (renaming of 0.26). .. deprecated :: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a decision tree classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nX_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\nReturns\n-------\nself : DecisionTreeClassifier\n Fitted estimator." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs such arrays if n_outputs > 1\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities of the input samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs such arrays if n_outputs > 1\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - } - ], - "docstring": "A decision tree classifier.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeRegressor : A decision tree regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe :meth:`predict` method operates using the :func:`numpy.argmax`\nfunction on the outputs of :meth:`predict_proba`. This means that in\ncase the highest predicted probabilities are tied, the classifier will\npredict the tied class with the lowest index in :term:`classes_`.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> clf = DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n>>> cross_val_score(clf, iris.data, iris.target, cv=10)\n... # doctest: +SKIP\n...\narray([ 1. , 0.93..., 0.86..., 0.93..., 0.93...,\n 0.93..., 0.93..., 1. , 0.93..., 1. ])" - }, - { - "name": "DecisionTreeRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"mse\", \"friedman_mse\", \"mae\", \"poisson\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion and minimizes the L2 loss using the mean of each terminal node, \"friedman_mse\", which uses mean squared error with Friedman's improvement score for potential splits, \"mae\" for the mean absolute error, which minimizes the L1 loss using the median of each terminal node, and \"poisson\" which uses reduction in Poisson deviance to find splits. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion. .. versionadded:: 0.24 Poisson deviance criterion." - }, - { - "name": "splitter", - "type": "Literal[\"best\", \"random\"]", - "hasDefault": true, - "default": "\"best\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``\"best\"``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary ` for details." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (real numbers). Use ``dtype=np.float64`` and ``order='C'`` for maximum efficiency." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - }, - { - "name": "X_idx_sorted", - "type": null, - "hasDefault": true, - "default": "\"deprecated\"", - "limitation": null, - "ignored": false, - "docstring": "This parameter is deprecated and has no effect. It will be removed in 1.1 (renaming of 0.26). .. deprecated :: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a decision tree regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (real numbers). Use ``dtype=np.float64`` and\n ``order='C'`` for maximum efficiency.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nX_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\nReturns\n-------\nself : DecisionTreeRegressor\n Fitted estimator." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point." - } - ], - "docstring": "A decision tree regressor.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"mse\", \"friedman_mse\", \"mae\", \"poisson\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and minimizes the L2 loss\n using the mean of each terminal node, \"friedman_mse\", which uses mean\n squared error with Friedman's improvement score for potential splits,\n \"mae\" for the mean absolute error, which minimizes the L1 loss using\n the median of each terminal node, and \"poisson\" which uses reduction in\n Poisson deviance to find splits.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the\n (normalized) total reduction of the criterion brought\n by that feature. It is also known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeClassifier : A decision tree classifier.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> regressor = DecisionTreeRegressor(random_state=0)\n>>> cross_val_score(regressor, X, y, cv=10)\n... # doctest: +SKIP\n...\narray([-0.39..., -0.46..., 0.02..., 0.06..., -0.50...,\n 0.16..., 0.11..., -0.73..., -0.30..., -0.00...])" - }, - { - "name": "ExtraTreeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain." - }, - { - "name": "splitter", - "type": "Literal[\"random\", \"best\"]", - "hasDefault": true, - "default": "\"random\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to pick randomly the `max_features` used at each split. See :term:`Glossary ` for details." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "class_weight", - "type": "Union[Dict, List[Dict], Literal[\"balanced\"]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeRegressor : An extremely randomized tree regressor.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.tree import ExtraTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> extra_tree = ExtraTreeClassifier(random_state=0)\n>>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n... X_train, y_train)\n>>> cls.score(X_test, y_test)\n0.8947..." - }, - { - "name": "ExtraTreeRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"mse\", \"friedman_mse\", \"mae\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion and \"mae\" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion. .. versionadded:: 0.24 Poisson deviance criterion." - }, - { - "name": "splitter", - "type": "Literal[\"random\", \"best\"]", - "hasDefault": true, - "default": "\"random\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to pick randomly the `max_features` used at each split. See :term:`Glossary ` for details." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"mse\", \"friedman_mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nmax_features_ : int\n The inferred value of max_features.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n Return impurity-based feature importances (the higher, the more\n important the feature).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeClassifier : An extremely randomized tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.tree import ExtraTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> extra_tree = ExtraTreeRegressor(random_state=0)\n>>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n... X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.33..." - } - ], - "functions": [] - }, - { - "name": "sklearn.tree._export", - "imports": [ - "from io import StringIO", - "from numbers import Integral", - "import numpy as np", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from base import is_classifier", - "from None import _criterion", - "from None import _tree", - "from _reingold_tilford import buchheim", - "from _reingold_tilford import Tree", - "from None import DecisionTreeClassifier", - "import warnings", - "import matplotlib.pyplot as plt", - "from matplotlib.text import Annotation" - ], - "classes": [ - { - "name": "Sentinel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_BaseTreeExporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_fill_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "node_to_str", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_DOTTreeExporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "export", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "tail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "head", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "recurse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_MPLTreeExporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "export", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "recurse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_color_brew", - "decorators": [], - "parameters": [ - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of colors required." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate n colors with equally spaced hues.\n\nParameters\n----------\nn : int\n The number of colors required.\n\nReturns\n-------\ncolor_list : list, length n\n List of n tuples of form (R, G, B) being the components of each color." - }, - { - "name": "plot_tree", - "decorators": [], - "parameters": [ - { - "name": "decision_tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision tree to be plotted." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the representation. If None, the tree is fully generated." - }, - { - "name": "feature_names", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the features. If None, generic names will be used (\"X[0]\", \"X[1]\", ...)." - }, - { - "name": "class_names", - "type": "Union[List[str], bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the target classes in ascending numerical order. Only relevant for classification and not supported for multi-output. If ``True``, shows a symbolic representation of the class name." - }, - { - "name": "label", - "type": "Literal['all', 'root', 'none']", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Whether to show informative labels for impurity, etc. Options include 'all' to show at every node, 'root' to show only at the top root node, or 'none' to not show at any node." - }, - { - "name": "filled", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output." - }, - { - "name": "impurity", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the impurity at each node." - }, - { - "name": "node_ids", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the ID number on each node." - }, - { - "name": "proportion", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, change the display of 'values' and/or 'samples' to be proportions and percentages respectively." - }, - { - "name": "rotate", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter has no effect on the matplotlib tree visualisation and it is kept here for backward compatibility. .. deprecated:: 0.23 ``rotate`` is deprecated in 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "rounded", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman." - }, - { - "name": "precision", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes to plot to. If None, use current axis. Any previous content is cleared." - }, - { - "name": "fontsize", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Size of text font. If None, determined automatically to fit figure." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure`` to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\ndecision_tree : decision tree regressor or classifier\n The decision tree to be plotted.\n\nmax_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\nfeature_names : list of strings, default=None\n Names of each of the features.\n If None, generic names will be used (\"X[0]\", \"X[1]\", ...).\n\nclass_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\nimpurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\nrotate : bool, default=False\n This parameter has no effect on the matplotlib tree visualisation and\n it is kept here for backward compatibility.\n\n .. deprecated:: 0.23\n ``rotate`` is deprecated in 0.23 and will be removed in 1.0\n (renaming of 0.25).\n\nrounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\nprecision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\nax : matplotlib axis, default=None\n Axes to plot to. If None, use current axis. Any previous content\n is cleared.\n\nfontsize : int, default=None\n Size of text font. If None, determined automatically to fit figure.\n\nReturns\n-------\nannotations : list of artists\n List containing the artists for the annotation boxes making up the\n tree.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.plot_tree(clf) # doctest: +SKIP\n[Text(251.5,345.217,'X[3] <= 0.8..." - }, - { - "name": "export_graphviz", - "decorators": [], - "parameters": [ - { - "name": "decision_tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision tree to be exported to GraphViz." - }, - { - "name": "out_file", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Handle or name of the output file. If ``None``, the result is returned as a string. .. versionchanged:: 0.20 Default of out_file changed from \"tree.dot\" to None." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the representation. If None, the tree is fully generated." - }, - { - "name": "feature_names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the features. If None generic names will be used (\"feature_0\", \"feature_1\", ...)." - }, - { - "name": "class_names", - "type": "Union[List[str], bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the target classes in ascending numerical order. Only relevant for classification and not supported for multi-output. If ``True``, shows a symbolic representation of the class name." - }, - { - "name": "label", - "type": "Literal['all', 'root', 'none']", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Whether to show informative labels for impurity, etc. Options include 'all' to show at every node, 'root' to show only at the top root node, or 'none' to not show at any node." - }, - { - "name": "filled", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output." - }, - { - "name": "leaves_parallel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, draw all leaf nodes at the bottom of the tree." - }, - { - "name": "impurity", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the impurity at each node." - }, - { - "name": "node_ids", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the ID number on each node." - }, - { - "name": "proportion", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, change the display of 'values' and/or 'samples' to be proportions and percentages respectively." - }, - { - "name": "rotate", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, orient tree left to right rather than top-down." - }, - { - "name": "rounded", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman." - }, - { - "name": "special_characters", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``False``, ignore special characters for PostScript compatibility." - }, - { - "name": "precision", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n $ dot -Tps tree.dot -o tree.ps (PostScript format)\n $ dot -Tpng tree.dot -o tree.png (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndecision_tree : decision tree classifier\n The decision tree to be exported to GraphViz.\n\nout_file : object or str, default=None\n Handle or name of the output file. If ``None``, the result is\n returned as a string.\n\n .. versionchanged:: 0.20\n Default of out_file changed from \"tree.dot\" to None.\n\nmax_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\nfeature_names : list of str, default=None\n Names of each of the features.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nclass_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\nleaves_parallel : bool, default=False\n When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\nimpurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\nrotate : bool, default=False\n When set to ``True``, orient tree left to right rather than top-down.\n\nrounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\nspecial_characters : bool, default=False\n When set to ``False``, ignore special characters for PostScript\n compatibility.\n\nprecision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\nReturns\n-------\ndot_data : string\n String representation of the input tree in GraphViz dot format.\n Only returned if ``out_file`` is None.\n\n .. versionadded:: 0.18\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier()\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.export_graphviz(clf)\n'digraph Tree {..." - }, - { - "name": "_compute_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the depth of the subtree rooted in node." - }, - { - "name": "export_text", - "decorators": [], - "parameters": [ - { - "name": "decision_tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision tree estimator to be exported. It can be an instance of DecisionTreeClassifier or DecisionTreeRegressor." - }, - { - "name": "feature_names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of length n_features containing the feature names. If None generic names will be used (\"feature_0\", \"feature_1\", ...)." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Only the first max_depth levels of the tree are exported. Truncated branches will be marked with \"...\"." - }, - { - "name": "spacing", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of spaces between edges. The higher it is, the wider the result." - }, - { - "name": "decimals", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of decimal digits to display." - }, - { - "name": "show_weights", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true the classification weights will be exported on each leaf. The classification weights are the number of samples each class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build a text report showing the rules of a decision tree.\n\nNote that backwards compatibility may not be supported.\n\nParameters\n----------\ndecision_tree : object\n The decision tree estimator to be exported.\n It can be an instance of\n DecisionTreeClassifier or DecisionTreeRegressor.\n\nfeature_names : list of str, default=None\n A list of length n_features containing the feature names.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nmax_depth : int, default=10\n Only the first max_depth levels of the tree are exported.\n Truncated branches will be marked with \"...\".\n\nspacing : int, default=3\n Number of spaces between edges. The higher it is, the wider the result.\n\ndecimals : int, default=2\n Number of decimal digits to display.\n\nshow_weights : bool, default=False\n If true the classification weights will be exported on each leaf.\n The classification weights are the number of samples each class.\n\nReturns\n-------\nreport : string\n Text summary of all the rules in the decision tree.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> from sklearn.tree import export_text\n>>> iris = load_iris()\n>>> X = iris['data']\n>>> y = iris['target']\n>>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)\n>>> decision_tree = decision_tree.fit(X, y)\n>>> r = export_text(decision_tree, feature_names=iris['feature_names'])\n>>> print(r)\n|--- petal width (cm) <= 0.80\n| |--- class: 0\n|--- petal width (cm) > 0.80\n| |--- petal width (cm) <= 1.75\n| | |--- class: 1\n| |--- petal width (cm) > 1.75\n| | |--- class: 2" - } - ] - }, - { - "name": "sklearn.tree._reingold_tilford", - "imports": [ - "import numpy as np" - ], - "classes": [ - { - "name": "DrawTree", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "left", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "right", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "lbrother", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_lmost_sibling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__str__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "max_extents", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Tree", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "buchheim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "third_walk", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "first_walk", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "apportion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "move_subtree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "execute_shifts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "ancestor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "second_walk", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree", - "imports": [ - "from _classes import BaseDecisionTree", - "from _classes import DecisionTreeClassifier", - "from _classes import DecisionTreeRegressor", - "from _classes import ExtraTreeClassifier", - "from _classes import ExtraTreeRegressor", - "from _export import export_graphviz", - "from _export import plot_tree", - "from _export import export_text" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.tree.tests.test_export", - "imports": [ - "from re import finditer", - "from re import search", - "from textwrap import dedent", - "from numpy.random import RandomState", - "import pytest", - "from sklearn.base import is_classifier", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.tree import export_graphviz", - "from sklearn.tree import plot_tree", - "from sklearn.tree import export_text", - "from io import StringIO", - "from sklearn.exceptions import NotFittedError" - ], - "classes": [], - "functions": [ - { - "name": "test_graphviz_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphviz_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_friedman_mse_in_graphviz", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_export_text_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_export_text", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_tree_entropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_tree_gini", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_tree_rotate_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_fitted_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree.tests.test_reingold_tilford", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.tree._reingold_tilford import buchheim", - "from sklearn.tree._reingold_tilford import Tree" - ], - "classes": [], - "functions": [ - { - "name": "test_buchheim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree.tests.test_tree", - "imports": [ - "import copy", - "import pickle", - "from itertools import product", - "import struct", - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import coo_matrix", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.dummy import DummyRegressor", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import mean_poisson_deviance", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import create_memmap_backed_data", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils.estimator_checks import check_sample_weights_invariance", - "from sklearn.utils.validation import check_random_state", - "from sklearn.exceptions import NotFittedError", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.tree import ExtraTreeClassifier", - "from sklearn.tree import ExtraTreeRegressor", - "from sklearn import tree", - "from sklearn.tree._tree import TREE_LEAF", - "from sklearn.tree._tree import TREE_UNDEFINED", - "from sklearn.tree._classes import CRITERIA_CLF", - "from sklearn.tree._classes import CRITERIA_REG", - "from sklearn import datasets", - "from sklearn.utils import compute_sample_weight", - "from sklearn.tree._utils import _realloc_test" - ], - "classes": [], - "functions": [ - { - "name": "assert_tree_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_xor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_diabetes_overfit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_diabetes_underfit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_arrayrepr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pure_set", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances_raises", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances_gini_equal_mse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test min_samples_split parameter" - }, - { - "name": "test_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_fraction_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if leaves contain at least min_weight_fraction_leaf of the\ntraining set" - }, - { - "name": "test_min_weight_fraction_leaf_on_dense_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_fraction_leaf_on_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_fraction_leaf_with_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test the interaction between min_weight_fraction_leaf and\nmin_samples_leaf when sample_weights is not provided in fit." - }, - { - "name": "test_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_decrease", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unbalanced_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_memory_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check class_weights resemble sample_weights behavior." - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_arrays_persist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_only_constant_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_behaviour_constant_feature_after_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_with_only_one_non_constant_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_big_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_realloc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huge_allocations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input_reg_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_explicit_sparse_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explicit_sparse_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_raise_error_on_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_min_weight_leaf_split_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_leaf_split_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_leaf_split_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_public_apply", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_public_apply_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_public_apply_all_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_public_apply_sparse_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_path_hardcoded", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_no_sparse_y_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_sparse_y_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mae", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check MAE criterion produces correct results on small toy dataset:\n\n------------------\n| X | y | weight |\n------------------\n| 3 | 3 | 0.1 |\n| 5 | 3 | 0.3 |\n| 8 | 4 | 1.0 |\n| 3 | 6 | 0.6 |\n| 5 | 7 | 0.3 |\n------------------\n|sum wt:| 2.3 |\n------------------\n\nBecause we are dealing with sample weights, we cannot find the median by\nsimply choosing/averaging the centre value(s), instead we consider the\nmedian where 50% of the cumulative weight is found (in a y sorted data set)\n. Therefore with regards to this test data, the cumulative weight is >= 50%\nwhen y = 4. Therefore:\nMedian = 4\n\nFor all the samples, we can get the total error by summing:\nAbsolute(Median - y) * weight\n\nI.e., total error = (Absolute(4 - 3) * 0.1)\n + (Absolute(4 - 3) * 0.3)\n + (Absolute(4 - 4) * 1.0)\n + (Absolute(4 - 6) * 0.6)\n + (Absolute(4 - 7) * 0.3)\n = 2.5\n\nImpurity = Total error / total weight\n = 2.5 / 2.3\n = 1.08695652173913\n ------------------\n\nFrom this root node, the next best split is between X values of 3 and 5.\nThus, we have left and right child nodes:\n\nLEFT RIGHT\n------------------ ------------------\n| X | y | weight | | X | y | weight |\n------------------ ------------------\n| 3 | 3 | 0.1 | | 5 | 3 | 0.3 |\n| 3 | 6 | 0.6 | | 8 | 4 | 1.0 |\n------------------ | 5 | 7 | 0.3 |\n|sum wt:| 0.7 | ------------------\n------------------ |sum wt:| 1.6 |\n ------------------\n\nImpurity is found in the same way:\nLeft node Median = 6\nTotal error = (Absolute(6 - 3) * 0.1)\n + (Absolute(6 - 6) * 0.6)\n = 0.3\n\nLeft Impurity = Total error / total weight\n = 0.3 / 0.7\n = 0.428571428571429\n -------------------\n\nLikewise for Right node:\nRight node Median = 4\nTotal error = (Absolute(4 - 3) * 0.3)\n + (Absolute(4 - 4) * 1.0)\n + (Absolute(4 - 7) * 0.3)\n = 1.2\n\nRight Impurity = Total error / total weight\n = 1.2 / 1.6\n = 0.75\n ------" - }, - { - "name": "test_criterion_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_empty_leaf_infinite_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_tree_classifier_are_subtrees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_tree_regression_are_subtrees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_single_node_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_pruning_creates_subtree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_is_subtree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_tree_raises_negative_ccp_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_apply_path_readonly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_apply_path_readonly_all_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balance_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_zero_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_vs_mse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_tree_regressor_sample_weight_consistentcy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the impact of sample_weight is consistent." - }, - { - "name": "test_X_idx_sorted_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.utils.class_weight", - "imports": [ - "import numpy as np", - "from validation import _deprecate_positional_args", - "from preprocessing import LabelEncoder" - ], - "classes": [], - "functions": [ - { - "name": "compute_class_weight", - "decorators": [], - "parameters": [ - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'balanced', class weights will be given by ``n_samples / (n_classes * np.bincount(y))``. If a dictionary is given, keys are classes and values are corresponding class weights. If None is given, the class weights will be uniform." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of the classes occurring in the data, as given by ``np.unique(y_org)`` with ``y_org`` the original class labels." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of original class labels per sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate class weights for unbalanced datasets.\n\nParameters\n----------\nclass_weight : dict, 'balanced' or None\n If 'balanced', class weights will be given by\n ``n_samples / (n_classes * np.bincount(y))``.\n If a dictionary is given, keys are classes and values\n are corresponding class weights.\n If None is given, the class weights will be uniform.\n\nclasses : ndarray\n Array of the classes occurring in the data, as given by\n ``np.unique(y_org)`` with ``y_org`` the original class labels.\n\ny : array-like of shape (n_samples,)\n Array of original class labels per sample.\n\nReturns\n-------\nclass_weight_vect : ndarray of shape (n_classes,)\n Array with class_weight_vect[i] the weight for i-th class.\n\nReferences\n----------\nThe \"balanced\" heuristic is inspired by\nLogistic Regression in Rare Events Data, King, Zen, 2001." - }, - { - "name": "compute_sample_weight", - "decorators": [], - "parameters": [ - { - "name": "class_weight", - "type": "Union[Dict, List, Literal[\"balanced\"]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data: ``n_samples / (n_classes * np.bincount(y))``. For multi-output, the weights of each column of y will be multiplied." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of original class labels per sample." - }, - { - "name": "indices", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of indices to be used in a subsample. Can be of length less than n_samples in the case of a subsample, or equal to n_samples in the case of a bootstrap subsample with repeated indices. If None, the sample weight will be calculated over the full sample. Only \"balanced\" is supported for class_weight if this is provided." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate sample weights by class for unbalanced datasets.\n\nParameters\n----------\nclass_weight : dict, list of dicts, \"balanced\", or None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data:\n ``n_samples / (n_classes * np.bincount(y))``.\n\n For multi-output, the weights of each column of y will be multiplied.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Array of original class labels per sample.\n\nindices : array-like of shape (n_subsample,), default=None\n Array of indices to be used in a subsample. Can be of length less than\n n_samples in the case of a subsample, or equal to n_samples in the\n case of a bootstrap subsample with repeated indices. If None, the\n sample weight will be calculated over the full sample. Only \"balanced\"\n is supported for class_weight if this is provided.\n\nReturns\n-------\nsample_weight_vect : ndarray of shape (n_samples,)\n Array with sample weights as applied to the original y." - } - ] - }, - { - "name": "sklearn.utils.deprecation", - "imports": [ - "import warnings", - "import functools" - ], - "classes": [ - { - "name": "deprecated", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "extra", - "type": "str", - "hasDefault": true, - "default": "''", - "limitation": null, - "ignored": false, - "docstring": "To be added to the deprecation messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "obj", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call method\n\nParameters\n----------\nobj : object" - }, - { - "name": "_decorate_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_decorate_fun", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorate function fun" - }, - { - "name": "_decorate_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_doc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n\n\n>>> @deprecated()\n... def some_function(): pass\n\nParameters\n----------\nextra : str, default=''\n To be added to the deprecation messages." - } - ], - "functions": [ - { - "name": "_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper to check if func is wrapped by our deprecated decorator" - } - ] - }, - { - "name": "sklearn.utils.estimator_checks", - "imports": [ - "import types", - "import warnings", - "import pickle", - "import re", - "from copy import deepcopy", - "from functools import partial", - "from functools import wraps", - "from inspect import signature", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats import rankdata", - "import joblib", - "from None import IS_PYPY", - "from None import config_context", - "from _testing import _get_args", - "from _testing import assert_raise_message", - "from _testing import assert_array_equal", - "from _testing import assert_array_almost_equal", - "from _testing import assert_allclose", - "from _testing import assert_allclose_dense_sparse", - "from _testing import set_random_state", - "from _testing import SkipTest", - "from _testing import ignore_warnings", - "from _testing import create_memmap_backed_data", - "from _testing import raises", - "from None import is_scalar_nan", - "from linear_model import LogisticRegression", - "from linear_model import Ridge", - "from base import clone", - "from base import ClusterMixin", - "from base import is_classifier", - "from base import is_regressor", - "from base import is_outlier_detector", - "from base import RegressorMixin", - "from base import _is_pairwise", - "from metrics import accuracy_score", - "from metrics import adjusted_rand_score", - "from metrics import f1_score", - "from random_projection import BaseRandomProjection", - "from feature_selection import SelectKBest", - "from pipeline import make_pipeline", - "from exceptions import DataConversionWarning", - "from exceptions import NotFittedError", - "from exceptions import SkipTestWarning", - "from model_selection import train_test_split", - "from model_selection import ShuffleSplit", - "from model_selection._validation import _safe_split", - "from metrics.pairwise import rbf_kernel", - "from metrics.pairwise import linear_kernel", - "from metrics.pairwise import pairwise_distances", - "from None import shuffle", - "from _tags import _DEFAULT_TAGS", - "from _tags import _safe_tags", - "from validation import has_fit_parameter", - "from validation import _num_samples", - "from preprocessing import StandardScaler", - "from preprocessing import scale", - "from datasets import load_iris", - "from datasets import make_blobs", - "from datasets import make_multilabel_classification", - "from datasets import make_regression", - "import pytest", - "import pandas as pd" - ], - "classes": [ - { - "name": "_NotAnArray", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__array__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__array_function__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An object that is convertible to an array.\n\nParameters\n----------\ndata : array-like\n The data." - } - ], - "functions": [ - { - "name": "_yield_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_classifier_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_supervised_y_no_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_regressor_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_transformer_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_clustering_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_outliers_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_all_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_check_estimator_ids", - "decorators": [], - "parameters": [ - { - "name": "obj", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Items generated by `check_estimator`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create pytest ids for checks.\n\nWhen `obj` is an estimator, this returns the pprint version of the\nestimator (with `print_changed_only=True`). When `obj` is a function, the\nname of the function is returned with its keyword arguments.\n\n`_get_check_estimator_ids` is designed to be used as the `id` in\n`pytest.mark.parametrize` where `check_estimator(..., generate_only=True)`\nis yielding estimators and checks.\n\nParameters\n----------\nobj : estimator or function\n Items generated by `check_estimator`.\n\nReturns\n-------\nid : str or None\n\nSee Also\n--------\ncheck_estimator" - }, - { - "name": "_construct_instance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Construct Estimator instance if possible." - }, - { - "name": "_maybe_mark_xfail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_maybe_skip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_should_be_skipped_or_marked", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "parametrize_with_checks", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimators to generated checks for. .. versionchanged:: 0.24 Passing a class was deprecated in version 0.23, and support for classes was removed in 0.24. Pass an instance instead. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\n pytest test_check_estimators.py -k check_estimators_fit_returns_self\n\nParameters\n----------\nestimators : list of estimators instances\n Estimators to generated checks for.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24. Pass an instance instead.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndecorator : `pytest.mark.parametrize`\n\nExamples\n--------\n>>> from sklearn.utils.estimator_checks import parametrize_with_checks\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.tree import DecisionTreeRegressor\n\n>>> @parametrize_with_checks([LogisticRegression(),\n... DecisionTreeRegressor()])\n... def test_sklearn_compatible_estimator(estimator, check):\n... check(estimator)" - }, - { - "name": "check_estimator", - "decorators": [], - "parameters": [ - { - "name": "Estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator instance to check. .. versionchanged:: 0.24 Passing a class was deprecated in version 0.23, and support for classes was removed in 0.24." - }, - { - "name": "generate_only", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When `False`, checks are evaluated when `check_estimator` is called. When `True`, `check_estimator` returns a generator that yields (estimator, check) tuples. The check is run by calling `check(estimator)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Check if estimator adheres to scikit-learn conventions.\n\nThis estimator will run an extensive test-suite for input validation,\nshapes, etc, making sure that the estimator complies with `scikit-learn`\nconventions as detailed in :ref:`rolling_your_own_estimator`.\nAdditional tests for classifiers, regressors, clustering or transformers\nwill be run if the Estimator class inherits from the corresponding mixin\nfrom sklearn.base.\n\nSetting `generate_only=True` returns a generator that yields (estimator,\ncheck) tuples where the check can be called independently from each\nother, i.e. `check(estimator)`. This allows all checks to be run\nindependently and report the checks that are failing.\n\nscikit-learn provides a pytest specific decorator,\n:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test\nmultiple estimators.\n\nParameters\n----------\nEstimator : estimator object\n Estimator instance to check.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24.\n\ngenerate_only : bool, default=False\n When `False`, checks are evaluated when `check_estimator` is called.\n When `True`, `check_estimator` returns a generator that yields\n (estimator, check) tuples. The check is run by calling\n `check(estimator)`.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nchecks_generator : generator\n Generator that yields (estimator, check) tuples. Returned when\n `generate_only=True`." - }, - { - "name": "_regression_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_checking_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_pairwise_metric", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns True if estimator accepts pairwise metric.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if _pairwise is set to True and False otherwise." - }, - { - "name": "_pairwise_estimator_convert_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_sparse_matrix", - "decorators": [], - "parameters": [ - { - "name": "X_csr: CSR Matrix", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input matrix in CSR format." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate sparse matrices with {32,64}bit indices of diverse format.\n\nParameters\n----------\nX_csr: CSR Matrix\n Input matrix in CSR format.\n\nReturns\n-------\nout: iter(Matrices)\n In format['dok', 'lil', 'dia', 'bsr', 'csr', 'csc', 'coo',\n 'coo_64', 'csc_64', 'csr_64']" - }, - { - "name": "check_estimator_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_pandas_series", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dtype_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_complex_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dict_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_public_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dont_overwrite_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit2d_predict1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_apply_on_subsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_methods_subset_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_methods_sample_order_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit2d_1sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit2d_1feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_general", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformers_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pipeline_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit_score_takes_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_preserve_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_empty_data_messages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_nan_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_nonsquare_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that error is thrown when non-square data provided." - }, - { - "name": "check_estimators_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that we can pickle all estimators." - }, - { - "name": "check_estimators_partial_fit_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifier_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressor_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_clusterer_compute_labels_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that predict is invariant of compute_labels." - }, - { - "name": "check_classifiers_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_outlier_corruption", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_outliers_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_multilabel_representation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_fit_returns_self", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if self is returned when calling fit." - }, - { - "name": "check_estimators_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that predict raises an exception in an unfitted estimator.\n\nUnfitted estimators should raise a NotFittedError." - }, - { - "name": "check_supervised_y_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_choose_check_classifiers_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressors_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressors_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressors_no_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_balanced_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_balanced_linear_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test class weights with non-contiguous class labels." - }, - { - "name": "check_estimators_overwrite_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_no_attributes_set_in_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check setting during init." - }, - { - "name": "check_sparsify_coefficients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifier_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressor_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_parameters_default_constructible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_enforce_estimator_tags_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_enforce_estimator_tags_x", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_non_transformer_estimators_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_get_params_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_regression_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_decision_proba_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_outliers_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit_non_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit_idempotent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_requires_y_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_n_features_in_after_fitting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimator_get_tags_default_keys", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.extmath", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy import sparse", - "from None import check_random_state", - "from _logistic_sigmoid import _log_logistic_sigmoid", - "from sparsefuncs_fast import csr_row_norms", - "from validation import check_array", - "from validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "squared_norm", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.\n\nParameters\n----------\nx : array-like\n\nReturns\n-------\nfloat\n The Euclidean norm when x is a vector, the Frobenius norm when x\n is a matrix (2-d array)." - }, - { - "name": "row_norms", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input array." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return squared norms." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Row-wise (squared) Euclidean norm of X.\n\nEquivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse\nmatrices and does not create an X.shape-sized temporary.\n\nPerforms no input validation.\n\nParameters\n----------\nX : array-like\n The input array.\nsquared : bool, default=False\n If True, return squared norms.\n\nReturns\n-------\narray-like\n The row-wise (squared) Euclidean norm of X." - }, - { - "name": "fast_logdet", - "decorators": [], - "parameters": [ - { - "name": "A", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute log(det(A)) for A symmetric.\n\nEquivalent to : np.log(nl.det(A)) but more robust.\nIt returns -Inf if det(A) is non positive or is not defined.\n\nParameters\n----------\nA : array-like\n The matrix." - }, - { - "name": "density", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sparse vector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute density of a sparse vector.\n\nParameters\n----------\nw : array-like\n The sparse vector.\n\nReturns\n-------\nfloat\n The density of w, between 0 and 1." - }, - { - "name": "safe_sparse_dot", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "b", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When False, ``a`` and ``b`` both being sparse will yield sparse output. When True, output will always be a dense array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dot product that handle the sparse matrix case correctly.\n\nParameters\n----------\na : {ndarray, sparse matrix}\nb : {ndarray, sparse matrix}\ndense_output : bool, default=False\n When False, ``a`` and ``b`` both being sparse will yield sparse output.\n When True, output will always be a dense array.\n\nReturns\n-------\ndot_product : {ndarray, sparse matrix}\n Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``." - }, - { - "name": "randomized_range_finder", - "decorators": [], - "parameters": [ - { - "name": "A", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data matrix." - }, - { - "name": "size", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Size of the return array." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of power iterations used to stabilize the result." - }, - { - "name": "power_iteration_normalizer", - "type": "Literal['auto', 'QR', 'LU', 'none']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' (the fastest but numerically unstable when `n_iter` is large, e.g. typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter` <= 2 and switches to LU otherwise. .. versionadded:: 0.18" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator to use when shuffling the data, i.e. getting the random vectors to initialize the algorithm. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes an orthonormal matrix whose range approximates the range of A.\n\nParameters\n----------\nA : 2D array\n The input data matrix.\n\nsize : int\n Size of the return array.\n\nn_iter : int\n Number of power iterations used to stabilize the result.\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nQ : ndarray\n A (size x size) projection matrix, the range of which\n approximates well the range of the input matrix A.\n\nNotes\n-----\n\nFollows Algorithm 4.3 of\nFinding structure with randomness: Stochastic algorithms for constructing\napproximate matrix decompositions\nHalko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\nAn implementation of a randomized algorithm for principal component\nanalysis\nA. Szlam et al. 2014" - }, - { - "name": "randomized_svd", - "decorators": [], - "parameters": [ - { - "name": "M", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to decompose." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of singular values and vectors to extract." - }, - { - "name": "n_oversamples", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Additional number of random vectors to sample the range of M so as to ensure proper conditioning. The total number of random vectors used to find the range of M is n_components + n_oversamples. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values." - }, - { - "name": "n_iter", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Number of power iterations. It can be used to deal with very noisy problems. When 'auto', it is set to 4, unless `n_components` is small (< .1 * min(X.shape)) `n_iter` in which case is set to 7. This improves precision with few components. .. versionchanged:: 0.18" - }, - { - "name": "power_iteration_normalizer", - "type": "Literal['auto', 'QR', 'LU', 'none']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' (the fastest but numerically unstable when `n_iter` is large, e.g. typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter` <= 2 and switches to LU otherwise. .. versionadded:: 0.18" - }, - { - "name": "transpose", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the algorithm should be applied to M.T instead of M. The result should approximately be the same. The 'auto' mode will trigger the transposition if M.shape[1] > M.shape[0] since this implementation of randomized SVD tend to be a little faster in that case. .. versionchanged:: 0.18" - }, - { - "name": "flip_sign", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "The output of a singular value decomposition is only unique up to a permutation of the signs of the singular vectors. If `flip_sign` is set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator to use when shuffling the data, i.e. getting the random vectors to initialize the algorithm. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes a truncated randomized SVD.\n\nParameters\n----------\nM : {ndarray, sparse matrix}\n Matrix to decompose.\n\nn_components : int\n Number of singular values and vectors to extract.\n\nn_oversamples : int, default=10\n Additional number of random vectors to sample the range of M so as\n to ensure proper conditioning. The total number of random vectors\n used to find the range of M is n_components + n_oversamples. Smaller\n number can improve speed but can negatively impact the quality of\n approximation of singular vectors and singular values.\n\nn_iter : int or 'auto', default='auto'\n Number of power iterations. It can be used to deal with very noisy\n problems. When 'auto', it is set to 4, unless `n_components` is small\n (< .1 * min(X.shape)) `n_iter` in which case is set to 7.\n This improves precision with few components.\n\n .. versionchanged:: 0.18\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\ntranspose : bool or 'auto', default='auto'\n Whether the algorithm should be applied to M.T instead of M. The\n result should approximately be the same. The 'auto' mode will\n trigger the transposition if M.shape[1] > M.shape[0] since this\n implementation of randomized SVD tend to be a little faster in that\n case.\n\n .. versionchanged:: 0.18\n\nflip_sign : bool, default=True\n The output of a singular value decomposition is only unique up to a\n permutation of the signs of the singular vectors. If `flip_sign` is\n set to `True`, the sign ambiguity is resolved by making the largest\n loadings for each component in the left singular vectors positive.\n\nrandom_state : int, RandomState instance or None, default=0\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nNotes\n-----\nThis algorithm finds a (usually very good) approximate truncated\nsingular value decomposition using randomization to speed up the\ncomputations. It is particularly fast on large matrices on which\nyou wish to extract only a small number of components. In order to\nobtain further speed up, `n_iter` can be set <=2 (at the cost of\nloss of precision).\n\nReferences\n----------\n* Finding structure with randomness: Stochastic algorithms for constructing\n approximate matrix decompositions\n Halko, et al., 2009 https://arxiv.org/abs/0909.4061\n\n* A randomized algorithm for the decomposition of matrices\n Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n* An implementation of a randomized algorithm for principal component\n analysis\n A. Szlam et al. 2014" - }, - { - "name": "weighted_mode", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-dimensional array of which to find mode(s)." - }, - { - "name": "w", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-dimensional array of weights for each value." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Axis along which to operate. Default is 0, i.e. the first axis." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns an array of the weighted modal (most common) value in a.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.\n\nParameters\n----------\na : array-like\n n-dimensional array of which to find mode(s).\nw : array-like\n n-dimensional array of weights for each value.\naxis : int, default=0\n Axis along which to operate. Default is 0, i.e. the first axis.\n\nReturns\n-------\nvals : ndarray\n Array of modal values.\nscore : ndarray\n Array of weighted counts for each mode.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import weighted_mode\n>>> x = [4, 1, 4, 2, 4, 2]\n>>> weights = [1, 1, 1, 1, 1, 1]\n>>> weighted_mode(x, weights)\n(array([4.]), array([3.]))\n\nThe value 4 appears three times: with uniform weights, the result is\nsimply the mode of the distribution.\n\n>>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's\n>>> weighted_mode(x, weights)\n(array([2.]), array([3.5]))\n\nThe value 2 has the highest score: it appears twice with weights of\n1.5 and 2: the sum of these is 3.5.\n\nSee Also\n--------\nscipy.stats.mode" - }, - { - "name": "cartesian", - "decorators": [], - "parameters": [ - { - "name": "arrays", - "type": "List[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "1-D arrays to form the cartesian product of." - }, - { - "name": "out", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array to place the cartesian product in." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a cartesian product of input arrays.\n\nParameters\n----------\narrays : list of array-like\n 1-D arrays to form the cartesian product of.\nout : ndarray, default=None\n Array to place the cartesian product in.\n\nReturns\n-------\nout : ndarray\n 2-D array of shape (M, len(arrays)) containing cartesian products\n formed of input arrays.\n\nExamples\n--------\n>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\narray([[1, 4, 6],\n [1, 4, 7],\n [1, 5, 6],\n [1, 5, 7],\n [2, 4, 6],\n [2, 4, 7],\n [2, 5, 6],\n [2, 5, 7],\n [3, 4, 6],\n [3, 4, 7],\n [3, 5, 6],\n [3, 5, 7]])\n\nNotes\n-----\nThis function may not be used on more than 32 arrays\nbecause the underlying numpy functions do not support it." - }, - { - "name": "svd_flip", - "decorators": [], - "parameters": [ - { - "name": "u", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "u and v are the output of `linalg.svd` or :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner dimensions so one can compute `np.dot(u * s, v)`." - }, - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "u and v are the output of `linalg.svd` or :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner dimensions so one can compute `np.dot(u * s, v)`. The input v should really be called vt to be consistent with scipy's ouput." - }, - { - "name": "u_based_decision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, use the columns of u as the basis for sign flipping. Otherwise, use the rows of v. The choice of which variable to base the decision on is generally algorithm dependent." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.\n\nParameters\n----------\nu : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n\nv : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n The input v should really be called vt to be consistent with scipy's\n ouput.\n\nu_based_decision : bool, default=True\n If True, use the columns of u as the basis for sign flipping.\n Otherwise, use the rows of v. The choice of which variable to base the\n decision on is generally algorithm dependent.\n\n\nReturns\n-------\nu_adjusted, v_adjusted : arrays with the same dimensions as the input." - }, - { - "name": "log_logistic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the logistic function." - }, - { - "name": "out", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preallocated output array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n -log(1 + exp(-x_i)) if x_i > 0\n x_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.\n\nParameters\n----------\nX : array-like of shape (M, N) or (M,)\n Argument to the logistic function.\n\nout : array-like of shape (M, N) or (M,), default=None\n Preallocated output array.\n\nReturns\n-------\nout : ndarray of shape (M, N) or (M,)\n Log of the logistic function evaluated at every point in x.\n\nNotes\n-----\nSee the blog post describing this implementation:\nhttp://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/" - }, - { - "name": "softmax", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the logistic function." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Copy X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the softmax function.\n\nThe softmax function is calculated by\nnp.exp(X) / np.sum(np.exp(X), axis=1)\n\nThis will cause overflow when large values are exponentiated.\nHence the largest value in each row is subtracted from each data\npoint to prevent this.\n\nParameters\n----------\nX : array-like of float of shape (M, N)\n Argument to the logistic function.\n\ncopy : bool, default=True\n Copy X or not.\n\nReturns\n-------\nout : ndarray of shape (M, N)\n Softmax function evaluated at every point in x." - }, - { - "name": "make_nonnegative", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The matrix to make non-negative." - }, - { - "name": "min_value", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The threshold value." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure `X.min()` >= `min_value`.\n\nParameters\n----------\nX : array-like\n The matrix to make non-negative.\nmin_value : float, default=0\n The threshold value.\n\nReturns\n-------\narray-like\n The thresholded array.\n\nRaises\n------\nValueError\n When X is sparse." - }, - { - "name": "_safe_accumulator_op", - "decorators": [], - "parameters": [ - { - "name": "op", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A numpy accumulator function such as np.mean or np.sum." - }, - { - "name": "x", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A numpy array to apply the accumulator function." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Positional arguments passed to the accumulator function after the input x." - }, - { - "name": "**kwargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments passed to the accumulator function." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "This function provides numpy accumulator functions with a float64 dtype\nwhen used on a floating point input. This prevents accumulator overflow on\nsmaller floating point dtypes.\n\nParameters\n----------\nop : function\n A numpy accumulator function such as np.mean or np.sum.\nx : ndarray\n A numpy array to apply the accumulator function.\n*args : positional arguments\n Positional arguments passed to the accumulator function after the\n input x.\n**kwargs : keyword arguments\n Keyword arguments passed to the accumulator function.\n\nReturns\n-------\nresult\n The output of the accumulator function passed to this function." - }, - { - "name": "_incremental_weighted_mean_and_var", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to use for mean and variance update." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - }, - { - "name": "last_mean", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mean before the incremental update." - }, - { - "name": "last_variance", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Variance before the incremental update. If None, variance update is not computed (in case scaling is not required)." - }, - { - "name": "last_weight_sum", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sum of weights before the incremental update." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate weighted mean and weighted variance incremental update.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to use for mean and variance update.\n\nsample_weight : array-like of shape (n_samples,) or None\n Sample weights. If None, then samples are equally weighted.\n\nlast_mean : array-like of shape (n_features,)\n Mean before the incremental update.\n\nlast_variance : array-like of shape (n_features,) or None\n Variance before the incremental update.\n If None, variance update is not computed (in case scaling is not\n required).\n\nlast_weight_sum : array-like of shape (n_features,)\n Sum of weights before the incremental update.\n\nReturns\n-------\nupdated_mean : array of shape (n_features,)\n\nupdated_variance : array of shape (n_features,) or None\n If None, only mean is computed.\n\nupdated_weight_sum : array of shape (n_features,)\n\nNotes\n-----\nNaNs in `X` are ignored.\n\n`last_mean` and `last_variance` are statistics computed at the last step\nby the function. Both must be initialized to 0.0.\nThe mean is always required (`last_mean`) and returned (`updated_mean`),\nwhereas the variance can be None (`last_variance` and `updated_variance`).\n\nFor further details on the algorithm to perform the computation in a\nnumerically stable way, see [Finch2009]_, Sections 4 and 5.\n\nReferences\n----------\n.. [Finch2009] `Tony Finch,\n \"Incremental calculation of weighted mean and variance\",\n University of Cambridge Computing Service, February 2009.\n `_" - }, - { - "name": "_incremental_mean_and_var", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to use for variance update." - }, - { - "name": "last_mean", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "last_variance", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "last_sample_count", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate mean update and a Youngs and Cramer variance update.\n\nlast_mean and last_variance are statistics computed at the last step by the\nfunction. Both must be initialized to 0.0. In case no scaling is required\nlast_variance can be None. The mean is always required and returned because\nnecessary for the calculation of the variance. last_n_samples_seen is the\nnumber of samples encountered until now.\n\nFrom the paper \"Algorithms for computing the sample variance: analysis and\nrecommendations\", by Chan, Golub, and LeVeque.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to use for variance update.\n\nlast_mean : array-like of shape (n_features,)\n\nlast_variance : array-like of shape (n_features,)\n\nlast_sample_count : array-like of shape (n_features,)\n\nReturns\n-------\nupdated_mean : ndarray of shape (n_features,)\n\nupdated_variance : ndarray of shape (n_features,)\n If None, only mean is computed.\n\nupdated_sample_count : ndarray of shape (n_features,)\n\nNotes\n-----\nNaNs are ignored during the algorithm.\n\nReferences\n----------\nT. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample\n variance: recommendations, The American Statistician, Vol. 37, No. 3,\n pp. 242-247\n\nAlso, see the sparse implementation of this in\n`utils.sparsefuncs.incr_mean_variance_axis` and\n`utils.sparsefuncs_fast.incr_mean_variance_axis0`" - }, - { - "name": "_deterministic_vector_sign_flip", - "decorators": [], - "parameters": [ - { - "name": "u", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array with vectors as its rows." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Modify the sign of vectors for reproducibility.\n\nFlips the sign of elements of all the vectors (rows of u) such that\nthe absolute maximum element of each vector is positive.\n\nParameters\n----------\nu : ndarray\n Array with vectors as its rows.\n\nReturns\n-------\nu_flipped : ndarray with same shape as u\n Array with the sign flipped vectors as its rows." - }, - { - "name": "stable_cumsum", - "decorators": [], - "parameters": [ - { - "name": "arr", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "To be cumulatively summed as flat." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the cumulative sum is computed. The default (None) is to compute the cumsum over the flattened array." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "1e-05", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance, see ``np.allclose``." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "1e-08", - "limitation": null, - "ignored": false, - "docstring": "Absolute tolerance, see ``np.allclose``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Use high precision for cumsum and check that final value matches sum.\n\nParameters\n----------\narr : array-like\n To be cumulatively summed as flat.\naxis : int, default=None\n Axis along which the cumulative sum is computed.\n The default (None) is to compute the cumsum over the flattened array.\nrtol : float, default=1e-05\n Relative tolerance, see ``np.allclose``.\natol : float, default=1e-08\n Absolute tolerance, see ``np.allclose``." - } - ] - }, - { - "name": "sklearn.utils.fixes", - "imports": [ - "from functools import update_wrapper", - "from distutils.version import LooseVersion", - "import functools", - "import numpy as np", - "import scipy.sparse as sp", - "import scipy", - "import scipy.stats", - "from scipy.sparse.linalg import lsqr as sparse_lsqr", - "from numpy.ma import MaskedArray as _MaskedArray", - "from _config import config_context", - "from _config import get_config", - "from deprecation import deprecated", - "from pkg_resources import parse_version", - "from scipy.sparse.linalg import lobpcg", - "from externals._lobpcg import lobpcg", - "import joblib" - ], - "classes": [ - { - "name": "loguniform", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "low", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The minimum value" - }, - { - "name": "high", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum value" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "A class supporting log-uniform random variables.\n\nParameters\n----------\nlow : float\n The minimum value\nhigh : float\n The maximum value\n\nMethods\n-------\nrvs(self, size=None, random_state=None)\n Generate log-uniform random variables\n\nThe most useful method for Scikit-learn usage is highlighted here.\nFor a full list, see\n`scipy.stats.reciprocal\n`_.\nThis list includes all functions of ``scipy.stats`` continuous\ndistributions such as ``pdf``.\n\nNotes\n-----\nThis class generates values between ``low`` and ``high`` or\n\n low <= loguniform(low, high).rvs() <= high\n\nThe logarithmic probability density function (PDF) is uniform. When\n``x`` is a uniformly distributed random variable between 0 and 1, ``10**x``\nare random variables that are equally likely to be returned.\n\nThis class is an alias to ``scipy.stats.reciprocal``, which uses the\nreciprocal distribution:\nhttps://en.wikipedia.org/wiki/Reciprocal_distribution\n\nExamples\n--------\n\n>>> from sklearn.utils.fixes import loguniform\n>>> rv = loguniform(1e-3, 1e1)\n>>> rvs = rv.rvs(random_state=42, size=1000)\n>>> rvs.min() # doctest: +SKIP\n0.0010435856341129003\n>>> rvs.max() # doctest: +SKIP\n9.97403052786026" - }, - { - "name": "MaskedArray", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "_FuncWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "\"Load the global configuration before calling the function." - } - ], - "functions": [ - { - "name": "_object_dtype_isnan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_astype_copy_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the copy=False parameter for\n{ndarray, csr_matrix, csc_matrix}.astype when possible,\notherwise don't specify" - }, - { - "name": "_joblib_parallel_args", - "decorators": [], - "parameters": [ - { - "name": "prefer", - "type": "Literal['processes', 'threads']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Soft hint to choose the default backend if no specific backend was selected with the parallel_backend context manager." - }, - { - "name": "require", - "type": "Optional[Literal['sharedmem']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Hard condstraint to select the backend. If set to 'sharedmem', the selected backend will be single-host and thread-based even if the user asked for a non-thread based backend with parallel_backend." - }, - { - "name": "See joblib.Parallel documentation for more details", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+\n\nFor joblib 0.11 this maps both ``prefer`` and ``require`` parameters to\na specific ``backend``.\n\nParameters\n----------\n\nprefer : str in {'processes', 'threads'} or None\n Soft hint to choose the default backend if no specific backend\n was selected with the parallel_backend context manager.\n\nrequire : 'sharedmem' or None\n Hard condstraint to select the backend. If set to 'sharedmem',\n the selected backend will be single-host and thread-based even\n if the user asked for a non-thread based backend with\n parallel_backend.\n\nSee joblib.Parallel documentation for more details" - }, - { - "name": "_take_along_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implements a simplified version of np.take_along_axis if numpy\nversion < 1.15" - }, - { - "name": "delayed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorator used to capture the arguments of a function." - } - ] - }, - { - "name": "sklearn.utils.graph", - "imports": [ - "from scipy import sparse", - "from graph_shortest_path import graph_shortest_path", - "from validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "single_source_shortest_path_length", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Adjacency matrix of the graph. Sparse matrix of format LIL is preferred." - }, - { - "name": "source", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting node for path." - }, - { - "name": "cutoff", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Depth to stop the search - only paths of length <= cutoff are returned." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the shortest path length from source to all reachable nodes.\n\nReturns a dictionary of shortest path lengths keyed by target.\n\nParameters\n----------\ngraph : {sparse matrix, ndarray} of shape (n, n)\n Adjacency matrix of the graph. Sparse matrix of format LIL is\n preferred.\n\nsource : int\n Starting node for path.\n\ncutoff : int, default=None\n Depth to stop the search - only paths of length <= cutoff are returned.\n\nExamples\n--------\n>>> from sklearn.utils.graph import single_source_shortest_path_length\n>>> import numpy as np\n>>> graph = np.array([[ 0, 1, 0, 0],\n... [ 1, 0, 1, 0],\n... [ 0, 1, 0, 1],\n... [ 0, 0, 1, 0]])\n>>> list(sorted(single_source_shortest_path_length(graph, 0).items()))\n[(0, 0), (1, 1), (2, 2), (3, 3)]\n>>> graph = np.ones((6, 6))\n>>> list(sorted(single_source_shortest_path_length(graph, 2).items()))\n[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]" - } - ] - }, - { - "name": "sklearn.utils.metaestimators", - "imports": [ - "from typing import List", - "from typing import Any", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from operator import attrgetter", - "from functools import update_wrapper", - "import numpy as np", - "from utils import _safe_indexing", - "from base import BaseEstimator", - "from base import _is_pairwise" - ], - "classes": [ - { - "name": "_BaseComposition", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_replace_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Handles parameter management for classifiers composed of named estimators.\n " - }, - { - "name": "_IffHasAttrDescriptor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__get__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif none of the delegates (specified in ``delegate_names``) is an attribute\nof the base object or the first found delegate does not have an attribute\n``attribute_name``.\n\nThis allows ducktyping of the decorated method based on\n``delegate.attribute_name``. Here ``delegate`` is the first item in\n``delegate_names`` for which ``hasattr(object, delegate) is True``.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors." - } - ], - "functions": [ - { - "name": "if_delegate_has_method", - "decorators": [], - "parameters": [ - { - "name": "delegate", - "type": "Union[List, str, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of the sub-estimator that can be accessed as an attribute of the base object. If a list or a tuple of names are provided, the first sub-estimator that is an attribute of the base object will be used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\nParameters\n----------\ndelegate : string, list of strings or tuple of strings\n Name of the sub-estimator that can be accessed as an attribute of the\n base object. If a list or a tuple of names are provided, the first\n sub-estimator that is an attribute of the base object will be used." - }, - { - "name": "_safe_split", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator to determine whether we should slice only rows or rows and columns." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to be indexed. If ``estimator._pairwise is True``, this needs to be a square array-like or sparse matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets to be indexed." - }, - { - "name": "indices", - "type": "Array[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Rows to select from X and y. If ``estimator._pairwise is True`` and ``train_indices is None`` then ``indices`` will also be used to slice columns." - }, - { - "name": "train_indices", - "type": "Array[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``estimator._pairwise is True`` and ``train_indices is not None``, then ``train_indices`` will be use to slice the columns of X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create subset of dataset and properly handle kernels.\n\nSlice X, y according to indices for cross-validation, but take care of\nprecomputed kernel-matrices or pairwise affinities / distances.\n\nIf ``estimator._pairwise is True``, X needs to be square and\nwe slice rows and columns. If ``train_indices`` is not None,\nwe slice rows using ``indices`` (assumed the test set) and columns\nusing ``train_indices``, indicating the training set.\n\n.. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.26) and onward, this function will check for the\n pairwise estimator tag.\n\nLabels y will always be indexed only along the first axis.\n\nParameters\n----------\nestimator : object\n Estimator to determine whether we should slice only rows or rows and\n columns.\n\nX : array-like, sparse matrix or iterable\n Data to be indexed. If ``estimator._pairwise is True``,\n this needs to be a square array-like or sparse matrix.\n\ny : array-like, sparse matrix or iterable\n Targets to be indexed.\n\nindices : array of int\n Rows to select from X and y.\n If ``estimator._pairwise is True`` and ``train_indices is None``\n then ``indices`` will also be used to slice columns.\n\ntrain_indices : array of int or None, default=None\n If ``estimator._pairwise is True`` and ``train_indices is not None``,\n then ``train_indices`` will be use to slice the columns of X.\n\nReturns\n-------\nX_subset : array-like, sparse matrix or list\n Indexed data.\n\ny_subset : array-like, sparse matrix or list\n Indexed targets." - } - ] - }, - { - "name": "sklearn.utils.multiclass", - "imports": [ - "from collections.abc import Sequence", - "from itertools import chain", - "import warnings", - "from scipy.sparse import issparse", - "from scipy.sparse.base import spmatrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "import numpy as np", - "from validation import check_array", - "from validation import _assert_all_finite" - ], - "classes": [], - "functions": [ - { - "name": "_unique_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_unique_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "unique_labels", - "decorators": [], - "parameters": [ - { - "name": "*ys", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract an ordered array of unique labels.\n\nWe don't allow:\n - mix of multilabel and multiclass (single label) targets\n - mix of label indicator matrix and anything else,\n because there are no explicit labels)\n - mix of label indicator matrices of different sizes\n - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.\n\nParameters\n----------\n*ys : array-likes\n\nReturns\n-------\nout : ndarray of shape (n_unique_labels,)\n An ordered array of unique labels.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import unique_labels\n>>> unique_labels([3, 5, 5, 5, 7, 7])\narray([3, 5, 7])\n>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\narray([1, 2, 3, 4])\n>>> unique_labels([1, 2, 10], [5, 11])\narray([ 1, 2, 5, 10, 11])" - }, - { - "name": "_is_integral_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_multilabel", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if ``y`` is in a multilabel format.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\nout : bool\n Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils.multiclass import is_multilabel\n>>> is_multilabel([0, 1, 0, 1])\nFalse\n>>> is_multilabel([[1], [0, 2], []])\nFalse\n>>> is_multilabel(np.array([[1, 0], [0, 0]]))\nTrue\n>>> is_multilabel(np.array([[1], [0], [0]]))\nFalse\n>>> is_multilabel(np.array([[1, 0, 0]]))\nTrue" - }, - { - "name": "check_classification_targets", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n 'binary', 'multiclass', 'multiclass-multioutput',\n 'multilabel-indicator', 'multilabel-sequences'\n\nParameters\n----------\ny : array-like" - }, - { - "name": "type_of_target", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n * ``binary`` is more specific but compatible with ``multiclass``.\n * ``multiclass`` of integers is more specific but compatible with\n ``continuous``.\n * ``multilabel-indicator`` is more specific but compatible with\n ``multiclass-multioutput``.\n\nParameters\n----------\ny : array-like\n\nReturns\n-------\ntarget_type : str\n One of:\n\n * 'continuous': `y` is an array-like of floats that are not all\n integers, and is 1d or a column vector.\n * 'continuous-multioutput': `y` is a 2d array of floats that are\n not all integers, and both dimensions are of size > 1.\n * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n vector.\n * 'multiclass': `y` contains more than two discrete values, is not a\n sequence of sequences, and is 1d or a column vector.\n * 'multiclass-multioutput': `y` is a 2d array that contains more\n than two discrete values, is not a sequence of sequences, and both\n dimensions are of size > 1.\n * 'multilabel-indicator': `y` is a label indicator matrix, an array\n of two dimensions with at least two columns, and at most 2 unique\n values.\n * 'unknown': `y` is array-like but none of the above, such as a 3d\n array, sequence of sequences, or an array of non-sequence objects.\n\nExamples\n--------\n>>> import numpy as np\n>>> type_of_target([0.1, 0.6])\n'continuous'\n>>> type_of_target([1, -1, -1, 1])\n'binary'\n>>> type_of_target(['a', 'b', 'a'])\n'binary'\n>>> type_of_target([1.0, 2.0])\n'binary'\n>>> type_of_target([1, 0, 2])\n'multiclass'\n>>> type_of_target([1.0, 0.0, 3.0])\n'multiclass'\n>>> type_of_target(['a', 'b', 'c'])\n'multiclass'\n>>> type_of_target(np.array([[1, 2], [3, 1]]))\n'multiclass-multioutput'\n>>> type_of_target([[1, 2]])\n'multilabel-indicator'\n>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n'continuous-multioutput'\n>>> type_of_target(np.array([[0, 1], [1, 1]]))\n'multilabel-indicator'" - }, - { - "name": "_check_partial_fit_first_call", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private helper function for factorizing common classes param logic.\n\nEstimators that implement the ``partial_fit`` API need to be provided with\nthe list of possible classes at the first call to partial_fit.\n\nSubsequent calls to partial_fit should check that ``classes`` is still\nconsistent with a previous value of ``clf.classes_`` when provided.\n\nThis function returns True if it detects that this was the first call to\n``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also\nset on ``clf``." - }, - { - "name": "class_distribution", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The labels for each example." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute class priors from multioutput-multiclass target data.\n\nParameters\n----------\ny : {array-like, sparse matrix} of size (n_samples, n_outputs)\n The labels for each example.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nclasses : list of size n_outputs of ndarray of size (n_classes,)\n List of classes for each column.\n\nn_classes : list of int of size n_outputs\n Number of classes in each column.\n\nclass_prior : list of size n_outputs of ndarray of size (n_classes,)\n Class distribution of each column." - }, - { - "name": "_ovr_decision_function", - "decorators": [], - "parameters": [ - { - "name": "predictions", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted classes for each binary classifier." - }, - { - "name": "confidences", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Decision functions or predicted probabilities for positive class for each binary classifier." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes. n_classifiers must be ``n_classes * (n_classes - 1 ) / 2``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute a continuous, tie-breaking OvR decision function from OvO.\n\nIt is important to include a continuous value, not only votes,\nto make computing AUC or calibration meaningful.\n\nParameters\n----------\npredictions : array-like of shape (n_samples, n_classifiers)\n Predicted classes for each binary classifier.\n\nconfidences : array-like of shape (n_samples, n_classifiers)\n Decision functions or predicted probabilities for positive class\n for each binary classifier.\n\nn_classes : int\n Number of classes. n_classifiers must be\n ``n_classes * (n_classes - 1 ) / 2``." - } - ] - }, - { - "name": "sklearn.utils.optimize", - "imports": [ - "import numpy as np", - "import warnings", - "from scipy.optimize.linesearch import line_search_wolfe2", - "from scipy.optimize.linesearch import line_search_wolfe1", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "_LineSearchError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_line_search_wolfe12", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Same as line_search_wolfe1, but fall back to line_search_wolfe2 if\nsuitable step length is not found, and raise an exception if a\nsuitable step length is not found.\n\nRaises\n------\n_LineSearchError\n If no suitable step size is found." - }, - { - "name": "_cg", - "decorators": [], - "parameters": [ - { - "name": "fhess_p", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function that takes the gradient as a parameter and returns the matrix product of the Hessian and gradient." - }, - { - "name": "fgrad", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gradient vector." - }, - { - "name": "maxiter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CG iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solve iteratively the linear system 'fhess_p . xsupi = fgrad'\nwith a conjugate gradient descent.\n\nParameters\n----------\nfhess_p : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient.\n\nfgrad : ndarray of shape (n_features,) or (n_features + 1,)\n Gradient vector.\n\nmaxiter : int\n Number of CG iterations.\n\ntol : float\n Stopping criterion.\n\nReturns\n-------\nxsupi : ndarray of shape (n_features,) or (n_features + 1,)\n Estimated solution." - }, - { - "name": "_newton_cg", - "decorators": [], - "parameters": [ - { - "name": "grad_hess", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return the gradient and a callable returning the matvec product of the Hessian." - }, - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return the value of the function." - }, - { - "name": "grad", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return the function value and the gradient. This is used by the linesearch functions." - }, - { - "name": "x0", - "type": "Array[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess." - }, - { - "name": "args", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments passed to func_grad_hess, func and grad." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. The iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol`` where ``g_i`` is the i-th component of the gradient." - }, - { - "name": "maxiter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of Newton iterations." - }, - { - "name": "maxinner", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Number of CG iterations." - }, - { - "name": "line_search", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a line search or not." - }, - { - "name": "warn", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to warn when didn't converge." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Minimization of scalar function of one or more variables using the\nNewton-CG algorithm.\n\nParameters\n----------\ngrad_hess : callable\n Should return the gradient and a callable returning the matvec product\n of the Hessian.\n\nfunc : callable\n Should return the value of the function.\n\ngrad : callable\n Should return the function value and the gradient. This is used\n by the linesearch functions.\n\nx0 : array of float\n Initial guess.\n\nargs : tuple, default=()\n Arguments passed to func_grad_hess, func and grad.\n\ntol : float, default=1e-4\n Stopping criterion. The iteration will stop when\n ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\nmaxiter : int, default=100\n Number of Newton iterations.\n\nmaxinner : int, default=200\n Number of CG iterations.\n\nline_search : bool, default=True\n Whether to use a line search or not.\n\nwarn : bool, default=True\n Whether to warn when didn't converge.\n\nReturns\n-------\nxk : ndarray of float\n Estimated minimum." - }, - { - "name": "_check_optimize_result", - "decorators": [], - "parameters": [ - { - "name": "solver", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver name. Currently only `lbfgs` is supported." - }, - { - "name": "result", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Result of the scipy.optimize.minimize function." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Expected maximum number of iterations." - }, - { - "name": "extra_warning_msg", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Extra warning message." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the OptimizeResult for successful convergence\n\nParameters\n----------\nsolver : str\n Solver name. Currently only `lbfgs` is supported.\n\nresult : OptimizeResult\n Result of the scipy.optimize.minimize function.\n\nmax_iter : int, default=None\n Expected maximum number of iterations.\n\nextra_warning_msg : str, default=None\n Extra warning message.\n\nReturns\n-------\nn_iter : int\n Number of iterations." - } - ] - }, - { - "name": "sklearn.utils.random", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import array", - "from None import check_random_state", - "from _random import sample_without_replacement" - ], - "classes": [], - "functions": [ - { - "name": "_random_choice_csc", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to draw in each column." - }, - { - "name": "classes", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of classes for each column." - }, - { - "name": "class_probability", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Class distribution of each column. If None, uniform distribution is assumed." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the sampled classes. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate a sparse random matrix given column class distributions\n\nParameters\n----------\nn_samples : int,\n Number of samples to draw in each column.\n\nclasses : list of size n_outputs of arrays of size (n_classes,)\n List of classes for each column.\n\nclass_probability : list of size n_outputs of arrays of shape (n_classes,), default=None\n Class distribution of each column. If None, uniform distribution is\n assumed.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the sampled classes.\n See :term:`Glossary `.\n\nReturns\n-------\nrandom_matrix : sparse csc matrix of size (n_samples, n_outputs)" - } - ] - }, - { - "name": "sklearn.utils.setup", - "imports": [ - "import os", - "from os.path import join", - "from sklearn._build_utils import gen_from_templates", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.sparsefuncs", - "imports": [ - "import scipy.sparse as sp", - "import numpy as np", - "from validation import _deprecate_positional_args", - "from sparsefuncs_fast import csr_mean_variance_axis0 as _csr_mean_var_axis0", - "from sparsefuncs_fast import csc_mean_variance_axis0 as _csc_mean_var_axis0", - "from sparsefuncs_fast import incr_mean_variance_axis0 as _incr_mean_var_axis0", - "from utils.validation import _check_sample_weight" - ], - "classes": [], - "functions": [ - { - "name": "_raise_typeerror", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raises a TypeError if X is not a CSR or CSC matrix" - }, - { - "name": "_raise_error_wrong_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inplace_csr_column_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to normalize using the variance of the features. It should be of CSR format." - }, - { - "name": "scale", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed feature-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace column scaling of a CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features.\n It should be of CSR format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling." - }, - { - "name": "inplace_csr_row_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to be scaled. It should be of CSR format." - }, - { - "name": "scale", - "type": "NDArray[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed sample-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace row scaling of a CSR matrix.\n\nScale each sample of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR format.\n\nscale : ndarray of float of shape (n_samples,)\n Array of precomputed sample-wise values to use for scaling." - }, - { - "name": "mean_variance_axis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It can be of CSR or CSC format." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the axis should be computed." - }, - { - "name": "weights", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "if axis is set to 0 shape is (n_samples,) or if axis is set to 1 shape is (n_features,). If it is set to None, then samples are equally weighted. .. versionadded:: 0.24" - }, - { - "name": "return_sum_weights", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns the sum of weights seen for each feature if `axis=0` or each sample if `axis=1`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mean and variance along an axis on a CSR or CSC matrix.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It can be of CSR or CSC format.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n if axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\nreturn_sum_weights : bool, default=False\n If True, returns the sum of weights seen for each feature\n if `axis=0` or each sample if `axis=1`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\n\nmeans : ndarray of shape (n_features,), dtype=floating\n Feature-wise means.\n\nvariances : ndarray of shape (n_features,), dtype=floating\n Feature-wise variances.\n\nsum_weights : ndarray of shape (n_features,), dtype=floating\n Returned if `return_sum_weights` is `True`." - }, - { - "name": "incr_mean_variance_axis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the axis should be computed." - }, - { - "name": "last_mean", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of means to update with the new data X. Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1." - }, - { - "name": "last_var", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of variances to update with the new data X. Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1." - }, - { - "name": "last_n", - "type": "Union[NDArray, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sum of the weights seen so far, excluding the current weights If not float, it should be of shape (n_samples,) if axis=0 or (n_features,) if axis=1. If float it corresponds to having same weights for all samples (or features)." - }, - { - "name": "weights", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If axis is set to 0 shape is (n_samples,) or if axis is set to 1 shape is (n_features,). If it is set to None, then samples are equally weighted. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute incremental mean and variance along an axis on a CSR or\nCSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.\n\nParameters\n----------\nX : CSR or CSC sparse matrix of shape (n_samples, n_features)\n Input data.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nlast_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of means to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of variances to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_n : float or ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Sum of the weights seen so far, excluding the current weights\n If not float, it should be of shape (n_samples,) if\n axis=0 or (n_features,) if axis=1. If float it corresponds to\n having same weights for all samples (or features).\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n If axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nmeans : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise means if axis = 0 or\n sample-wise means if axis = 1.\n\nvariances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise variances if axis = 0 or\n sample-wise variances if axis = 1.\n\nn : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n Updated number of seen samples per feature if axis=0\n or number of seen features per sample if axis=1.\n\n If weights is not None, n is a sum of the weights of the seen\n samples or features instead of the actual number of seen\n samples or features.\n\nNotes\n-----\nNaNs are ignored in the algorithm." - }, - { - "name": "inplace_column_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to normalize using the variance of the features. It should be of CSC or CSR format." - }, - { - "name": "scale", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed feature-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace column scaling of a CSC/CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features. It should be\n of CSC or CSR format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling." - }, - { - "name": "inplace_row_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to be scaled. It should be of CSR or CSC format." - }, - { - "name": "scale", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed sample-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace row scaling of a CSR or CSC matrix.\n\nScale each row of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR or CSC format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed sample-wise values to use for scaling." - }, - { - "name": "inplace_swap_row_csc", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two rows are to be swapped. It should be of CSC format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two rows of a CSC matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSC format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped." - }, - { - "name": "inplace_swap_row_csr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two rows are to be swapped. It should be of CSR format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two rows of a CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSR format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped." - }, - { - "name": "inplace_swap_row", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two rows are to be swapped. It should be of CSR or CSC format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two rows of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of CSR or\n CSC format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped." - }, - { - "name": "inplace_swap_column", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two columns are to be swapped. It should be of CSR or CSC format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the column of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the column of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two columns of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two columns are to be swapped. It should be of\n CSR or CSC format.\n\nm : int\n Index of the column of X to be swapped.\n\nn : int\n Index of the column of X to be swapped." - }, - { - "name": "_minor_reduce", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_min_or_max_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_min_or_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_nan_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "min_max_axis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It should be of CSR or CSC format." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the axis should be computed." - }, - { - "name": "ignore_nan", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Ignore or passing through NaN values. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute minimum and maximum along an axis on a CSR or CSC matrix and\noptionally ignore NaN values.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSR or CSC format.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nignore_nan : bool, default=False\n Ignore or passing through NaN values.\n\n .. versionadded:: 0.20\n\nReturns\n-------\n\nmins : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise minima.\n\nmaxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise maxima." - }, - { - "name": "count_nonzero", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It should be of CSR format." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which the data is aggregated." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight for each row of X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A variant of X.getnnz() with extension to weighting on axis 0\n\nUseful in efficiently calculating multilabel metrics.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_labels)\n Input data. It should be of CSR format.\n\naxis : {0, 1}, default=None\n The axis on which the data is aggregated.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight for each row of X." - }, - { - "name": "_get_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the median of data with n_zeros additional zeros.\n\nThis function is used to support sparse matrices; it modifies data\nin-place." - }, - { - "name": "_get_elem_at_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the value in data augmented with n_zeros for the given rank" - }, - { - "name": "csc_median_axis_0", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It should be of CSC format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the median across axis 0 of a CSC matrix.\nIt is equivalent to doing np.median(X, axis=0).\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSC format.\n\nReturns\n-------\nmedian : ndarray of shape (n_features,)\n Median." - } - ] - }, - { - "name": "sklearn.utils.stats", - "imports": [ - "import numpy as np", - "from extmath import stable_cumsum", - "from fixes import _take_along_axis" - ], - "classes": [], - "functions": [ - { - "name": "_weighted_percentile", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to take the weighted percentile of." - }, - { - "name": "sample_weight: 1D or 2D array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights for each value in `array`. Must be same shape as `array` or of shape `(array.shape[0],)`." - }, - { - "name": "percentile: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Percentile to compute. Must be value between 0 and 100." - }, - { - "name": "default=50", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Percentile to compute. Must be value between 0 and 100." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n .. versionchanged:: 0.24\n Accepts 2D `array`.\n\nParameters\n----------\narray : 1D or 2D array\n Values to take the weighted percentile of.\n\nsample_weight: 1D or 2D array\n Weights for each value in `array`. Must be same shape as `array` or\n of shape `(array.shape[0],)`.\n\npercentile: int, default=50\n Percentile to compute. Must be value between 0 and 100.\n\nReturns\n-------\npercentile : int if `array` 1D, ndarray if `array` 2D\n Weighted percentile." - } - ] - }, - { - "name": "sklearn.utils.validation", - "imports": [ - "from functools import wraps", - "import warnings", - "import numbers", - "import numpy as np", - "import scipy.sparse as sp", - "from inspect import signature", - "from inspect import isclass", - "from inspect import Parameter", - "from numpy.core.numeric import ComplexWarning", - "import joblib", - "from contextlib import suppress", - "from fixes import _object_dtype_isnan", - "from fixes import parse_version", - "from None import get_config as _get_config", - "from exceptions import PositiveSpectrumWarning", - "from exceptions import NotFittedError", - "from exceptions import DataConversionWarning", - "from extmath import _safe_accumulator_op", - "from pandas.api.types import is_sparse", - "from pandas import Int8Dtype", - "from pandas import Int16Dtype", - "from pandas import Int32Dtype", - "from pandas import Int64Dtype", - "from pandas import UInt8Dtype", - "from pandas import UInt16Dtype", - "from pandas import UInt32Dtype", - "from pandas import UInt64Dtype", - "from None import _safe_indexing" - ], - "classes": [], - "functions": [ - { - "name": "_deprecate_positional_args", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function to check arguments on." - }, - { - "name": "version", - "type": "Callable", - "hasDefault": true, - "default": "\"1", - "limitation": null, - "ignored": false, - "docstring": "The version when positional arguments will result in error." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorator for methods that issues warnings for positional arguments.\n\nUsing the keyword-only argument syntax in pep 3102, arguments after the\n* will issue a warning when passed as a positional argument.\n\nParameters\n----------\nfunc : callable, default=None\n Function to check arguments on.\nversion : callable, default=\"1.0 (renaming of 0.25)\"\n The version when positional arguments will result in error." - }, - { - "name": "_assert_all_finite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Like assert_all_finite, but only for ndarray." - }, - { - "name": "assert_all_finite", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "allow_nan", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Throw a ValueError if X contains NaN or infinity.\n\nParameters\n----------\nX : {ndarray, sparse matrix}\n\nallow_nan : bool, default=False" - }, - { - "name": "as_float_array", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a copy of X will be created. If False, a copy may still be returned if X's dtype is not a floating point type." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The possibilities are: - True: Force all values of X to be finite. - False: accepts np.inf, np.nan, pd.NA in X. - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Converts an array-like to an array of floats.\n\nThe new dtype will be np.float32 or np.float64, depending on the original\ntype. The function can create a copy or modify the argument depending\non the argument copy.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n\ncopy : bool, default=True\n If True, a copy of X will be created. If False, a copy may still be\n returned if X's dtype is not a floating point type.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nReturns\n-------\nXT : {ndarray, sparse matrix}\n An array of type float." - }, - { - "name": "_is_arraylike", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the input is array-like." - }, - { - "name": "_num_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return number of samples in array-like x." - }, - { - "name": "check_memory", - "decorators": [], - "parameters": [ - { - "name": "memory", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Check that ``memory`` is joblib.Memory-like.\n\njoblib.Memory-like means that ``memory`` can be converted into a\njoblib.Memory instance (typically a str denoting the ``location``)\nor has the same interface (has a ``cache`` method).\n\nParameters\n----------\nmemory : None, str or object with the joblib.Memory interface\n\nReturns\n-------\nmemory : object with the joblib.Memory interface\n\nRaises\n------\nValueError\n If ``memory`` is not joblib.Memory-like." - }, - { - "name": "check_consistent_length", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": "Union[List, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Objects that will be checked for consistent length." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that all arrays have consistent first dimensions.\n\nChecks whether all objects in arrays have the same shape or length.\n\nParameters\n----------\n*arrays : list or tuple of input objects.\n Objects that will be checked for consistent length." - }, - { - "name": "_make_indexable", - "decorators": [], - "parameters": [ - { - "name": "iterable", - "type": "Union[List, NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Object to be converted to an indexable iterable." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure iterable supports indexing or convert to an indexable variant.\n\nConvert sparse matrices to csr and other non-indexable iterable to arrays.\nLet `None` and indexable objects (e.g. pandas dataframes) pass unchanged.\n\nParameters\n----------\niterable : {list, dataframe, ndarray, sparse matrix} or None\n Object to be converted to an indexable iterable." - }, - { - "name": "indexable", - "decorators": [], - "parameters": [ - { - "name": "*iterables", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of objects to ensure sliceability." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make arrays indexable for cross-validation.\n\nChecks consistent length, passes through None, and ensures that everything\ncan be indexed by converting sparse matrices to csr and converting\nnon-interable objects to arrays.\n\nParameters\n----------\n*iterables : {lists, dataframes, ndarrays, sparse matrices}\n List of objects to ensure sliceability." - }, - { - "name": "_ensure_sparse_format", - "decorators": [], - "parameters": [ - { - "name": "spmatrix", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input to validate and convert." - }, - { - "name": "accept_sparse", - "type": "Union[bool, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats ('csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "dtype", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data type of result. If None, the dtype of the input is preserved." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The possibilities are: - True: Force all values of X to be finite. - False: accepts np.inf, np.nan, pd.NA in X. - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert a sparse matrix to a given format.\n\nChecks the sparse format of spmatrix and converts if necessary.\n\nParameters\n----------\nspmatrix : sparse matrix\n Input to validate and convert.\n\naccept_sparse : str, bool or list/tuple of str\n String[s] representing allowed sparse matrix formats ('csc',\n 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but\n not in the allowed format, it will be converted to the first listed\n format. True allows the input to be any format. False means\n that a sparse matrix input will raise an error.\n\ndtype : str, type or None\n Data type of result. If None, the dtype of the input is preserved.\n\ncopy : bool\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan'\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nReturns\n-------\nspmatrix_converted : sparse matrix.\n Matrix that is ensured to have an allowed type." - }, - { - "name": "_ensure_no_complex_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_array", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input object to check / convert." - }, - { - "name": "accept_sparse", - "type": "Union[bool, str]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "accept_large_sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by accept_sparse, accept_large_sparse=False will cause it to be accepted only if its indices are stored with a 32-bit dtype. .. versionadded:: 0.20" - }, - { - "name": "dtype", - "type": "Union[List, Literal['numeric']]", - "hasDefault": true, - "default": "'numeric'", - "limitation": null, - "ignored": false, - "docstring": "Data type of result. If None, the dtype of the input is preserved. If \"numeric\", dtype is preserved unless array.dtype is object. If dtype is a list of types, conversion on the first type is only performed if the dtype of the input is not in the list." - }, - { - "name": "order", - "type": "Literal['F', 'C']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether an array will be forced to be fortran or c-style. When order is None (default), then if copy=False, nothing is ensured about the memory layout of the output array; otherwise (copy=True) the memory layout of the returned array is kept as close as possible to the original array." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: - True: Force all values of array to be finite. - False: accepts np.inf, np.nan, pd.NA in array. - 'allow-nan': accepts only np.nan and pd.NA values in array. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - }, - { - "name": "ensure_2d", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise a value error if array is not 2D." - }, - { - "name": "allow_nd", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to allow array.ndim > 2." - }, - { - "name": "ensure_min_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that the array has a minimum number of samples in its first axis (rows for a 2D array). Setting to 0 disables this check." - }, - { - "name": "ensure_min_features", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that the 2D array has some minimum number of features (columns). The default value of 1 rejects empty datasets. This check is only enforced when the input data has effectively 2 dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check." - }, - { - "name": "estimator", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If passed, include the name of the estimator in warning messages." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Input validation on an array, list, sparse matrix or similar.\n\nBy default, the input is checked to be a non-empty 2D array containing\nonly finite values. If the dtype of the array is object, attempt\nconverting to float, raising on failure.\n\nParameters\n----------\narray : object\n Input object to check / convert.\n\naccept_sparse : str, bool or list/tuple of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\naccept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse=False will cause it to be accepted\n only if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'} or None, default=None\n Whether an array will be forced to be fortran or c-style.\n When order is None (default), then if copy=False, nothing is ensured\n about the memory layout of the output array; otherwise (copy=True)\n the memory layout of the returned array is kept as close as possible\n to the original array.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n Whether to raise a value error if array is not 2D.\n\nallow_nd : bool, default=False\n Whether to allow array.ndim > 2.\n\nensure_min_samples : int, default=1\n Make sure that the array has a minimum number of samples in its first\n axis (rows for a 2D array). Setting to 0 disables this check.\n\nensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when the input data has effectively 2\n dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n disables this check.\n\nestimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\nReturns\n-------\narray_converted : object\n The converted and validated array." - }, - { - "name": "_check_large_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raise a ValueError if X has 64bit indices and accept_large_sparse=False\n " - }, - { - "name": "check_X_y", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[NDArray, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "Union[NDArray, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels." - }, - { - "name": "accept_sparse", - "type": "Union[bool, str, List[str]]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "accept_large_sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by accept_sparse, accept_large_sparse will cause it to be accepted only if its indices are stored with a 32-bit dtype. .. versionadded:: 0.20" - }, - { - "name": "dtype", - "type": "Union[List, Literal['numeric']]", - "hasDefault": true, - "default": "'numeric'", - "limitation": null, - "ignored": false, - "docstring": "Data type of result. If None, the dtype of the input is preserved. If \"numeric\", dtype is preserved unless array.dtype is object. If dtype is a list of types, conversion on the first type is only performed if the dtype of the input is not in the list." - }, - { - "name": "order", - "type": "Literal['F', 'C']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether an array will be forced to be fortran or c-style." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter does not influence whether y can have np.inf, np.nan, pd.NA values. The possibilities are: - True: Force all values of X to be finite. - False: accepts np.inf, np.nan, pd.NA in X. - 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - }, - { - "name": "ensure_2d", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise a value error if X is not 2D." - }, - { - "name": "allow_nd", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to allow X.ndim > 2." - }, - { - "name": "multi_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to allow 2D y (array or sparse matrix). If false, y will be validated as a vector. y cannot have np.nan or np.inf values if multi_output=True." - }, - { - "name": "ensure_min_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that X has a minimum number of samples in its first axis (rows for a 2D array)." - }, - { - "name": "ensure_min_features", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that the 2D array has some minimum number of features (columns). The default value of 1 rejects empty datasets. This check is only enforced when X has effectively 2 dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check." - }, - { - "name": "y_numeric", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to ensure that y has a numeric type. If dtype of y is object, it is converted to float64. Should only be used for regression algorithms." - }, - { - "name": "estimator", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If passed, include the name of the estimator in warning messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Input validation for standard estimators.\n\nChecks X and y for consistent length, enforces X to be 2D and y 1D. By\ndefault, X is checked to be non-empty and containing only finite values.\nStandard input checks are also applied to y, such as checking that y\ndoes not have np.nan or np.inf targets. For multi-label y, set\nmulti_output=True to allow 2D and sparse y. If the dtype of X is\nobject, attempt converting to float, raising on failure.\n\nParameters\n----------\nX : {ndarray, list, sparse matrix}\n Input data.\n\ny : {ndarray, list, sparse matrix}\n Labels.\n\naccept_sparse : str, bool or list of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\naccept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse will cause it to be accepted only\n if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'}, default=None\n Whether an array will be forced to be fortran or c-style.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\n does not influence whether y can have np.inf, np.nan, pd.NA values.\n The possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n Whether to raise a value error if X is not 2D.\n\nallow_nd : bool, default=False\n Whether to allow X.ndim > 2.\n\nmulti_output : bool, default=False\n Whether to allow 2D y (array or sparse matrix). If false, y will be\n validated as a vector. y cannot have np.nan or np.inf values if\n multi_output=True.\n\nensure_min_samples : int, default=1\n Make sure that X has a minimum number of samples in its first\n axis (rows for a 2D array).\n\nensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when X has effectively 2 dimensions or\n is originally 1D and ``ensure_2d`` is True. Setting to 0 disables\n this check.\n\ny_numeric : bool, default=False\n Whether to ensure that y has a numeric type. If dtype of y is object,\n it is converted to float64. Should only be used for regression\n algorithms.\n\nestimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\nReturns\n-------\nX_converted : object\n The converted and validated X.\n\ny_converted : object\n The converted and validated y." - }, - { - "name": "column_or_1d", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "warn", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control display of warnings." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ravel column or 1d numpy array, else raises an error.\n\nParameters\n----------\ny : array-like\n\nwarn : bool, default=False\n To control display of warnings.\n\nReturns\n-------\ny : ndarray" - }, - { - "name": "check_random_state", - "decorators": [], - "parameters": [ - { - "name": "seed", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If seed is None, return the RandomState singleton used by np.random. If seed is an int, return a new RandomState instance seeded with seed. If seed is already a RandomState instance, return it. Otherwise raise ValueError." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Turn seed into a np.random.RandomState instance\n\nParameters\n----------\nseed : None, int or instance of RandomState\n If seed is None, return the RandomState singleton used by np.random.\n If seed is an int, return a new RandomState instance seeded with seed.\n If seed is already a RandomState instance, return it.\n Otherwise raise ValueError." - }, - { - "name": "has_fit_parameter", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator to inspect." - }, - { - "name": "parameter", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The searched parameter." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Checks whether the estimator's fit method supports the given parameter.\n\nParameters\n----------\nestimator : object\n An estimator to inspect.\n\nparameter : str\n The searched parameter.\n\nReturns\n-------\nis_parameter: bool\n Whether the parameter was found to be a named parameter of the\n estimator's fit method.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> has_fit_parameter(SVC(), \"sample_weight\")\nTrue" - }, - { - "name": "check_symmetric", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input object to check / convert. Must be two-dimensional and square, otherwise a ValueError will be raised." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-10", - "limitation": null, - "ignored": false, - "docstring": "Absolute tolerance for equivalence of arrays. Default = 1E-10." - }, - { - "name": "raise_warning", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True then raise a warning if conversion is required." - }, - { - "name": "raise_exception", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True then raise an exception if array is not symmetric." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that array is 2D, square and symmetric.\n\nIf the array is not symmetric, then a symmetrized version is returned.\nOptionally, a warning or exception is raised if the matrix is not\nsymmetric.\n\nParameters\n----------\narray : {ndarray, sparse matrix}\n Input object to check / convert. Must be two-dimensional and square,\n otherwise a ValueError will be raised.\n\ntol : float, default=1e-10\n Absolute tolerance for equivalence of arrays. Default = 1E-10.\n\nraise_warning : bool, default=True\n If True then raise a warning if conversion is required.\n\nraise_exception : bool, default=False\n If True then raise an exception if array is not symmetric.\n\nReturns\n-------\narray_sym : {ndarray, sparse matrix}\n Symmetrized version of the input array, i.e. the average of array\n and array.transpose(). If sparse, then duplicate entries are first\n summed and zeros are eliminated." - }, - { - "name": "check_is_fitted", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "estimator instance for which the check is performed." - }, - { - "name": "attributes", - "type": "Union[List, str, Tuple[str]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Attribute name(s) given as string or a list/tuple of strings Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"`` If `None`, `estimator` is considered fitted if there exist an attribute that ends with a underscore and does not start with double underscore." - }, - { - "name": "msg", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The default error message is, \"This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.\" For custom messages if \"%(name)s\" is present in the message string, it is substituted for the estimator name. Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\"." - }, - { - "name": "all_or_any", - "type": "Any", - "hasDefault": true, - "default": "all", - "limitation": null, - "ignored": false, - "docstring": "Specify whether all or any of the given attributes must exist." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nThis utility is meant to be used internally by estimators themselves,\ntypically in their own predict / transform methods.\n\nParameters\n----------\nestimator : estimator instance\n estimator instance for which the check is performed.\n\nattributes : str, list or tuple of str, default=None\n Attribute name(s) given as string or a list/tuple of strings\n Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n If `None`, `estimator` is considered fitted if there exist an\n attribute that ends with a underscore and does not start with double\n underscore.\n\nmsg : str, default=None\n The default error message is, \"This %(name)s instance is not fitted\n yet. Call 'fit' with appropriate arguments before using this\n estimator.\"\n\n For custom messages if \"%(name)s\" is present in the message string,\n it is substituted for the estimator name.\n\n Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\nall_or_any : callable, {all, any}, default=all\n Specify whether all or any of the given attributes must exist.\n\nReturns\n-------\nNone\n\nRaises\n------\nNotFittedError\n If the attributes are not found." - }, - { - "name": "check_non_negative", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "whom", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Who passed X to this function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if there is any negative value in an array.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Input data.\n\nwhom : str\n Who passed X to this function." - }, - { - "name": "check_scalar", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The scalar parameter to validate." - }, - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the parameter to be printed in error messages." - }, - { - "name": "target_type", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Acceptable data types for the parameter." - }, - { - "name": "min_val", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The minimum valid value the parameter can take. If None (default) it is implied that the parameter does not have a lower bound." - }, - { - "name": "max_val", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum valid value the parameter can take. If None (default) it is implied that the parameter does not have an upper bound." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate scalar parameters type and value.\n\nParameters\n----------\nx : object\n The scalar parameter to validate.\n\nname : str\n The name of the parameter to be printed in error messages.\n\ntarget_type : type or tuple\n Acceptable data types for the parameter.\n\nmin_val : float or int, default=None\n The minimum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have a lower bound.\n\nmax_val : float or int, default=None\n The maximum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have an upper bound.\n\nRaises\n-------\nTypeError\n If the parameter's type does not match the desired type.\n\nValueError\n If the parameter's value violates the given bounds." - }, - { - "name": "_check_psd_eigenvalues", - "decorators": [], - "parameters": [ - { - "name": "lambdas", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of eigenvalues to check / fix." - }, - { - "name": "enable_warnings", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When this is set to ``True``, a ``PositiveSpectrumWarning`` will be raised when there are imaginary parts, negative eigenvalues, or extremely small non-zero eigenvalues. Otherwise no warning will be raised. In both cases, imaginary parts, negative eigenvalues, and extremely small non-zero eigenvalues will be set to zero." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n 1e-5 times the maximum real part). If this check fails, it raises a\n ``ValueError``. Otherwise all non-significant imaginary parts that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n ``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\n more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n positive eigenvalue in double (simple) precision. If this check fails,\n it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.\n\nParameters\n----------\nlambdas : array-like of shape (n_eigenvalues,)\n Array of eigenvalues to check / fix.\n\nenable_warnings : bool, default=False\n When this is set to ``True``, a ``PositiveSpectrumWarning`` will be\n raised when there are imaginary parts, negative eigenvalues, or\n extremely small non-zero eigenvalues. Otherwise no warning will be\n raised. In both cases, imaginary parts, negative eigenvalues, and\n extremely small non-zero eigenvalues will be set to zero.\n\nReturns\n-------\nlambdas_fixed : ndarray of shape (n_eigenvalues,)\n A fixed validated copy of the array of eigenvalues.\n\nExamples\n--------\n>>> _check_psd_eigenvalues([1, 2]) # nominal case\narray([1, 2])\n>>> _check_psd_eigenvalues([5, 5j]) # significant imag part\nTraceback (most recent call last):\n ...\nValueError: There are significant imaginary parts in eigenvalues (1\n of the maximum real part). Either the matrix is not PSD, or there was\n an issue while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, 5e-5j]) # insignificant imag part\narray([5., 0.])\n>>> _check_psd_eigenvalues([-5, -1]) # all negative\nTraceback (most recent call last):\n ...\nValueError: All eigenvalues are negative (maximum is -1). Either the\n matrix is not PSD, or there was an issue while computing the\n eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -1]) # significant negative\nTraceback (most recent call last):\n ...\nValueError: There are significant negative eigenvalues (0.2 of the\n maximum positive). Either the matrix is not PSD, or there was an issue\n while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -5e-5]) # insignificant negative\narray([5., 0.])\n>>> _check_psd_eigenvalues([5, 4e-12]) # bad conditioning (too small)\narray([5., 0.])" - }, - { - "name": "_check_sample_weight", - "decorators": [], - "parameters": [ - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input sample weights." - }, - { - "name": "X", - "type": "Union[NDArray, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "dtype: dtype", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "dtype of the validated `sample_weight`. If None, and the input `sample_weight` is an array, the dtype of the input is preserved; otherwise an array with the default numpy dtype is be allocated. If `dtype` is not one of `float32`, `float64`, `None`, the output will be of dtype `float64`." - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "dtype of the validated `sample_weight`. If None, and the input `sample_weight` is an array, the dtype of the input is preserved; otherwise an array with the default numpy dtype is be allocated. If `dtype` is not one of `float32`, `float64`, `None`, the output will be of dtype `float64`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\n sample_weight = _check_sample_weight(...)\n\nParameters\n----------\nsample_weight : {ndarray, Number or None}, shape (n_samples,)\n Input sample weights.\n\nX : {ndarray, list, sparse matrix}\n Input data.\n\ndtype: dtype, default=None\n dtype of the validated `sample_weight`.\n If None, and the input `sample_weight` is an array, the dtype of the\n input is preserved; otherwise an array with the default numpy dtype\n is be allocated. If `dtype` is not one of `float32`, `float64`,\n `None`, the output will be of dtype `float64`.\n\nReturns\n-------\nsample_weight : ndarray of shape (n_samples,)\n Validated sample weight. It is guaranteed to be \"C\" contiguous." - }, - { - "name": "_allclose_dense_sparse", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First array to compare." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Second array to compare." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance; see numpy.allclose." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "1e-9", - "limitation": null, - "ignored": false, - "docstring": "absolute tolerance; see numpy.allclose. Note that the default here is more tolerant than the default for numpy.testing.assert_allclose, where atol=0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.\n\nParameters\n----------\nx : {array-like, sparse matrix}\n First array to compare.\n\ny : {array-like, sparse matrix}\n Second array to compare.\n\nrtol : float, default=1e-7\n Relative tolerance; see numpy.allclose.\n\natol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0." - }, - { - "name": "_check_fit_params", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data array." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary containing the parameters passed at fit." - }, - { - "name": "indices", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices to be selected if the parameter has the same size as `X`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check and validate the parameters passed during `fit`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data array.\n\nfit_params : dict\n Dictionary containing the parameters passed at fit.\n\nindices : array-like of shape (n_samples,), default=None\n Indices to be selected if the parameter has the same size as `X`.\n\nReturns\n-------\nfit_params_validated : dict\n Validated parameters. We ensure that the values support indexing." - } - ] - }, - { - "name": "sklearn.utils._arpack", - "imports": [ - "from validation import check_random_state" - ], - "classes": [], - "functions": [ - { - "name": "_init_arpack_v0", - "decorators": [], - "parameters": [ - { - "name": "size", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the eigenvalue vector to be initialized." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator used to generate a uniform distribution. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the starting vector for iteration in ARPACK functions.\n\nInitialize a ndarray with values sampled from the uniform distribution on\n[-1, 1]. This initialization model has been chosen to be consistent with\nthe ARPACK one as another initialization can lead to convergence issues.\n\nParameters\n----------\nsize : int\n The size of the eigenvalue vector to be initialized.\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator used to generate a\n uniform distribution. If int, random_state is the seed used by the\n random number generator; If RandomState instance, random_state is the\n random number generator; If None, the random number generator is the\n RandomState instance used by `np.random`.\n\nReturns\n-------\nv0 : ndarray of shape (size,)\n The initialized vector." - } - ] - }, - { - "name": "sklearn.utils._encode", - "imports": [ - "from typing import NamedTuple", - "import numpy as np", - "from None import is_scalar_nan" - ], - "classes": [ - { - "name": "MissingValues", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "to_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert tuple to a list where None is always first." - } - ], - "docstring": "Data class for missing data information" - }, - { - "name": "_nandict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__missing__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dictionary with support for nans." - } - ], - "functions": [ - { - "name": "_unique", - "decorators": [], - "parameters": [ - { - "name": "values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to check for unknowns." - }, - { - "name": "return_inverse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, also return the indices of the unique values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to find unique values with support for python objects.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\n\nParameters\n----------\nvalues : ndarray\n Values to check for unknowns.\n\nreturn_inverse : bool, default=False\n If True, also return the indices of the unique values.\n\nReturns\n-------\nunique : ndarray\n The sorted unique values.\n\nunique_inverse : ndarray\n The indices to reconstruct the original array from the unique array.\n Only provided if `return_inverse` is True." - }, - { - "name": "_extract_missing", - "decorators": [], - "parameters": [ - { - "name": "values: set", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set of values to extract missing from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract missing values from `values`.\n\nParameters\n----------\nvalues: set\n Set of values to extract missing from.\n\nReturns\n-------\noutput: set\n Set with missing values extracted.\n\nmissing_values: MissingValues\n Object with missing value information." - }, - { - "name": "_map_to_integer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Map values based on its position in uniques." - }, - { - "name": "_unique_python", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_encode", - "decorators": [], - "parameters": [ - { - "name": "values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to encode." - }, - { - "name": "uniques", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The unique values in `values`. If the dtype is not object, then `uniques` needs to be sorted." - }, - { - "name": "check_unknown", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, check for values in `values` that are not in `unique` and raise an error. This is ignored for object dtype, and treated as True in this case. This parameter is useful for _BaseEncoder._transform() to avoid calling _check_unknown() twice." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to encode values into [0, n_uniques - 1].\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\nThe numpy method has the limitation that the `uniques` need to\nbe sorted. Importantly, this is not checked but assumed to already be\nthe case. The calling method needs to ensure this for all non-object\nvalues.\n\nParameters\n----------\nvalues : ndarray\n Values to encode.\nuniques : ndarray\n The unique values in `values`. If the dtype is not object, then\n `uniques` needs to be sorted.\ncheck_unknown : bool, default=True\n If True, check for values in `values` that are not in `unique`\n and raise an error. This is ignored for object dtype, and treated as\n True in this case. This parameter is useful for\n _BaseEncoder._transform() to avoid calling _check_unknown()\n twice.\n\nReturns\n-------\nencoded : ndarray\n Encoded values" - }, - { - "name": "_check_unknown", - "decorators": [], - "parameters": [ - { - "name": "values", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to check for unknowns." - }, - { - "name": "known_values", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Known values. Must be unique." - }, - { - "name": "return_mask", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return a mask of the same shape as `values` indicating the valid values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to check for unknowns in values to be encoded.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\n\nParameters\n----------\nvalues : array\n Values to check for unknowns.\nknown_values : array\n Known values. Must be unique.\nreturn_mask : bool, default=False\n If True, return a mask of the same shape as `values` indicating\n the valid values.\n\nReturns\n-------\ndiff : list\n The unique values present in `values` and not in `know_values`.\nvalid_mask : boolean array\n Additionally returned if ``return_mask=True``." - } - ] - }, - { - "name": "sklearn.utils._estimator_html_repr", - "imports": [ - "from contextlib import closing", - "from contextlib import suppress", - "from io import StringIO", - "import uuid", - "import html", - "from sklearn import config_context" - ], - "classes": [ - { - "name": "_VisualBlock", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kind", - "type": "Literal['serial', 'parallel', 'single']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "kind of HTML block" - }, - { - "name": "estimators", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If kind != 'single', then `estimators` is a list of estimators. If kind == 'single', then `estimators` is a single estimator." - }, - { - "name": "names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If kind != 'single', then `names` corresponds to estimators. If kind == 'single', then `names` is a single string corresponding to the single estimator." - }, - { - "name": "name_details", - "type": "Union[List[str], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If kind != 'single', then `name_details` corresponds to `names`. If kind == 'single', then `name_details` is a single string corresponding to the single estimator." - }, - { - "name": "dash_wrapped", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, wrapped HTML element will be wrapped with a dashed border. Only active when kind != 'single'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "HTML Representation of Estimator\n\nParameters\n----------\nkind : {'serial', 'parallel', 'single'}\n kind of HTML block\n\nestimators : list of estimators or `_VisualBlock`s or a single estimator\n If kind != 'single', then `estimators` is a list of\n estimators.\n If kind == 'single', then `estimators` is a single estimator.\n\nnames : list of str, default=None\n If kind != 'single', then `names` corresponds to estimators.\n If kind == 'single', then `names` is a single string corresponding to\n the single estimator.\n\nname_details : list of str, str, or None, default=None\n If kind != 'single', then `name_details` corresponds to `names`.\n If kind == 'single', then `name_details` is a single string\n corresponding to the single estimator.\n\ndash_wrapped : bool, default=True\n If true, wrapped HTML element will be wrapped with a dashed border.\n Only active when kind != 'single'." - } - ], - "functions": [ - { - "name": "_write_label_html", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Write labeled html with or without a dropdown with named details" - }, - { - "name": "_get_visual_block", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate information about how to display an estimator.\n " - }, - { - "name": "_write_estimator_html", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Write estimator to html in serial, parallel, or by itself (single).\n " - }, - { - "name": "estimator_html_repr", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to visualize." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a HTML representation of an estimator.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n The estimator to visualize.\n\nReturns\n-------\nhtml: str\n HTML representation of estimator." - } - ] - }, - { - "name": "sklearn.utils._joblib", - "imports": [ - "import warnings as _warnings", - "import joblib", - "from joblib import logger", - "from joblib import dump", - "from joblib import load", - "from joblib import __version__", - "from joblib import effective_n_jobs", - "from joblib import hash", - "from joblib import cpu_count", - "from joblib import Parallel", - "from joblib import Memory", - "from joblib import delayed", - "from joblib import parallel_backend", - "from joblib import register_parallel_backend" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.utils._mask", - "imports": [ - "import numpy as np", - "from scipy import sparse as sp", - "from None import is_scalar_nan", - "from fixes import _object_dtype_isnan" - ], - "classes": [], - "functions": [ - { - "name": "_get_dense_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_mask", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features." - }, - { - "name": "value_to_mask", - "type": "Union[float, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The value which is to be masked in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the boolean mask X == value_to_mask.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nvalue_to_mask : {int, float}\n The value which is to be masked in X.\n\nReturns\n-------\nX_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Missing mask." - } - ] - }, - { - "name": "sklearn.utils._mocking", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from validation import _num_samples", - "from validation import check_array", - "from validation import check_is_fitted" - ], - "classes": [ - { - "name": "ArraySlicingWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Parameters\n----------\narray" - }, - { - "name": "MockDataFrame", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__array__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__ne__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Parameters\n----------\narray" - }, - { - "name": "CheckingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "check_y", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable used to validate `X` and `y`. These callable should return a bool where `False` will trigger an `AssertionError`." - }, - { - "name": "check_X", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable used to validate `X` and `y`. These callable should return a bool where `False` will trigger an `AssertionError`." - }, - { - "name": "check_y_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The optional parameters to pass to `check_X` and `check_y`." - }, - { - "name": "check_X_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The optional parameters to pass to `check_X` and `check_y`." - }, - { - "name": "methods_to_check", - "type": "Union[List[str], Literal[\"all\"]]", - "hasDefault": true, - "default": "\"all\"", - "limitation": null, - "ignored": false, - "docstring": "The methods in which the checks should be applied. By default, all checks will be done on all methods (`fit`, `predict`, `predict_proba`, `decision_function` and `score`)." - }, - { - "name": "foo_param", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1 otherwise it is 0." - }, - { - "name": "expected_fit_params", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of the expected parameters given when calling `fit`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data set." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The corresponding target, by default None." - }, - { - "name": "should_be_fitted", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the classifier should be already fitted. By default True." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Validate X and y and make extra check.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data set.\ny : array-like of shape (n_samples), default=None\n The corresponding target, by default None.\nshould_be_fitted : bool, default=True\n Whether or not the classifier should be already fitted.\n By default True.\n\nReturns\n-------\nX, y" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the estimator" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_outputs) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the first class seen in `classes_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\npreds : ndarray of shape (n_samples,)\n Predictions of the first class seens in `classes_`." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probabilities for each class.\n\nHere, the dummy classifier will provide a probability of 1 for the\nfirst class of `classes_` and 0 otherwise.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes)\n The probabilities for each sample and class." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Confidence score.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence score." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fake score.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\nY : array-like of shape (n_samples, n_output) or (n_samples,)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nReturns\n-------\nscore : float\n Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>\n score=1` otherwise `score=0`)." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test pipelining and meta-estimators.\n\nChecks some property of `X` and `y`in fit / predict.\nThis allows testing whether pipelines / cross-validation or metaestimators\nchanged the input.\n\nCan also be used to check if `fit_params` are passed correctly, and\nto force a certain score to be returned.\n\nParameters\n----------\ncheck_y, check_X : callable, default=None\n The callable used to validate `X` and `y`. These callable should return\n a bool where `False` will trigger an `AssertionError`.\n\ncheck_y_params, check_X_params : dict, default=None\n The optional parameters to pass to `check_X` and `check_y`.\n\nmethods_to_check : \"all\" or list of str, default=\"all\"\n The methods in which the checks should be applied. By default,\n all checks will be done on all methods (`fit`, `predict`,\n `predict_proba`, `decision_function` and `score`).\n\nfoo_param : int, default=0\n A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1\n otherwise it is 0.\n\nexpected_fit_params : list of str, default=None\n A list of the expected parameters given when calling `fit`.\n\nAttributes\n----------\nclasses_ : int\n The classes seen during `fit`.\n\nn_features_in_ : int\n The number of features seen during `fit`.\n\nExamples\n--------\n>>> from sklearn.utils._mocking import CheckingClassifier\n\nThis helper allow to assert to specificities regarding `X` or `y`. In this\ncase we expect `check_X` or `check_y` to return a boolean.\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))\n>>> clf.fit(X, y)\nCheckingClassifier(...)\n\nWe can also provide a check which might raise an error. In this case, we\nexpect `check_X` to return `X` and `check_y` to return `y`.\n\n>>> from sklearn.utils import check_array\n>>> clf = CheckingClassifier(check_X=check_array)\n>>> clf.fit(X, y)\nCheckingClassifier(...)" - }, - { - "name": "NoSampleWeightWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "est", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to wrap." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Wrap estimator which will not expose `sample_weight`.\n\nParameters\n----------\nest : estimator, default=None\n The estimator to wrap." - } - ], - "functions": [] - }, - { - "name": "sklearn.utils._pprint", - "imports": [ - "import inspect", - "import pprint", - "from collections import OrderedDict", - "from base import BaseEstimator", - "from _config import get_config", - "from None import is_scalar_nan" - ], - "classes": [ - { - "name": "KeyValTuple", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy class for correctly rendering key-value tuples from dicts." - }, - { - "name": "KeyValTupleParam", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Dummy class for correctly rendering key-value tuples from parameters." - }, - { - "name": "_EstimatorPrettyPrinter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pprint_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_format_dict_items", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_format_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_format_params_or_dict_items", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Format dict items or parameters respecting the compact=True\nparameter. For some reason, the builtin rendering of dict items doesn't\nrespect compact=True and will use one line per key-value if all cannot\nfit in a single line.\nDict items will be rendered as <'key': value> while params will be\nrendered as . The implementation is mostly copy/pasting from\nthe builtin _format_items().\nThis also adds ellipsis if the number of items is greater than\nself.n_max_elements_to_show." - }, - { - "name": "_format_items", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Format the items of an iterable (list, tuple...). Same as the\nbuilt-in _format_items, with support for ellipsis if the number of\nelements is greater than self.n_max_elements_to_show." - }, - { - "name": "_pprint_key_val_tuple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pretty printing for key-value tuples from dict or parameters." - } - ], - "docstring": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n here)\n- format() directly calls _safe_repr() for a first try at rendering the\n object\n- _safe_repr formats the whole object reccursively, only calling itself,\n not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n the type of the object. This where the line length and the compact\n parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this." - } - ], - "functions": [ - { - "name": "_changed_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return dict (param_name: value) of parameters that were given to\nestimator with non-default values." - }, - { - "name": "_safe_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Same as the builtin _safe_repr, with added support for Estimator\nobjects." - } - ] - }, - { - "name": "sklearn.utils._show_versions", - "imports": [ - "import platform", - "import sys", - "import importlib", - "from _openmp_helpers import _openmp_parallelism_enabled" - ], - "classes": [], - "functions": [ - { - "name": "_get_sys_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "System information\n\nReturns\n-------\nsys_info : dict\n system and Python version information" - }, - { - "name": "_get_deps_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Overview of the installed version of main dependencies\n\nReturns\n-------\ndeps_info: dict\n version information on relevant Python libraries" - }, - { - "name": "show_versions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print useful debugging information\"\n\n.. versionadded:: 0.20" - } - ] - }, - { - "name": "sklearn.utils._tags", - "imports": [ - "import numpy as np" - ], - "classes": [], - "functions": [ - { - "name": "_safe_tags", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator from which to get the tag." - }, - { - "name": "key", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tag name to get. By default (`None`), all tags are returned." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Safely get estimator tags.\n\n:class:`~sklearn.BaseEstimator` provides the estimator tags machinery.\nHowever, if an estimator does not inherit from this base class, we should\nfall-back to the default tags.\n\nFor scikit-learn built-in estimators, we should still rely on\n`self._get_tags()`. `_safe_tags(est)` should be used when we are not sure\nwhere `est` comes from: typically `_safe_tags(self.base_estimator)` where\n`self` is a meta-estimator, or in the common checks.\n\nParameters\n----------\nestimator : estimator object\n The estimator from which to get the tag.\n\nkey : str, default=None\n Tag name to get. By default (`None`), all tags are returned.\n\nReturns\n-------\ntags : dict or tag value\n The estimator tags. A single value is returned if `key` is not None." - } - ] - }, - { - "name": "sklearn.utils._testing", - "imports": [ - "import os", - "import os.path as op", - "import inspect", - "import warnings", - "import sys", - "import functools", - "import tempfile", - "from subprocess import check_output", - "from subprocess import STDOUT", - "from subprocess import CalledProcessError", - "from subprocess import TimeoutExpired", - "import re", - "import contextlib", - "from collections.abc import Iterable", - "import scipy as sp", - "from functools import wraps", - "from inspect import signature", - "import shutil", - "import atexit", - "import unittest", - "from unittest import TestCase", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_approx_equal", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_less", - "import numpy as np", - "import joblib", - "import sklearn", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils import _IS_32BIT", - "from sklearn.utils.multiclass import check_classification_targets", - "from sklearn.utils.validation import check_array", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.utils.validation import check_X_y", - "import pytest", - "from numpydoc import docscrape", - "import difflib", - "import pprint", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import r2_score" - ], - "classes": [ - { - "name": "_IgnoreWarnings", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "category", - "type": "Tuple[]", - "hasDefault": true, - "default": "Warning", - "limitation": null, - "ignored": false, - "docstring": "The category to filter. By default, all the categories will be muted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorator to catch and hide warnings without visual nesting." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__enter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Improved and simplified Python warnings context manager and decorator.\n\nThis class allows the user to ignore the warnings raised by a function.\nCopied from Python 2.7.5 and modified as required.\n\nParameters\n----------\ncategory : tuple of warning class, default=Warning\n The category to filter. By default, all the categories will be muted." - }, - { - "name": "TempMemmap", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "mmap_mode", - "type": "str", - "hasDefault": true, - "default": "'r'", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__enter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Parameters\n----------\ndata\nmmap_mode : str, default='r'" - }, - { - "name": "_Raises", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MinimalClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Minimal classifier implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`." - }, - { - "name": "MinimalRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Minimal regressor implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`." - }, - { - "name": "MinimalTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Minimal transformer implementation with inheriting from\nBaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`." - } - ], - "functions": [ - { - "name": "assert_warns", - "decorators": [], - "parameters": [ - { - "name": "warning_class", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to test for, e.g. UserWarning." - }, - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable object to trigger warnings." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Test that a certain warning occurs.\n\nParameters\n----------\nwarning_class : the warning class\n The class to test for, e.g. UserWarning.\n\nfunc : callable\n Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`\n\nReturns\n-------\nresult : the return value of `func`" - }, - { - "name": "assert_warns_message", - "decorators": [], - "parameters": [ - { - "name": "warning_class", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to test for, e.g. UserWarning." - }, - { - "name": "message", - "type": "Union[Callable, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The message or a substring of the message to test for. If callable, it takes a string as the argument and will trigger an AssertionError if the callable returns `False`." - }, - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable object to trigger warnings." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Test that a certain warning occurs and with a certain message.\n\nParameters\n----------\nwarning_class : the warning class\n The class to test for, e.g. UserWarning.\n\nmessage : str or callable\n The message or a substring of the message to test for. If callable,\n it takes a string as the argument and will trigger an AssertionError\n if the callable returns `False`.\n\nfunc : callable\n Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`.\n\nReturns\n-------\nresult : the return value of `func`" - }, - { - "name": "assert_warns_div0", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assume that numpy's warning for divide by zero is raised.\n\nHandles the case of platforms that do not support warning on divide by\nzero.\n\nParameters\n----------\nfunc\n*args\n**kw" - }, - { - "name": "assert_no_warnings", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Parameters\n----------\nfunc\n*args\n**kw" - }, - { - "name": "ignore_warnings", - "decorators": [], - "parameters": [ - { - "name": "obj", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "callable where you want to ignore the warnings." - }, - { - "name": "category", - "type": null, - "hasDefault": true, - "default": "Warning", - "limitation": null, - "ignored": false, - "docstring": "The category to filter. If Warning, all categories will be muted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Context manager and decorator to ignore warnings.\n\nNote: Using this (in both variants) will clear all warnings\nfrom all python modules loaded. In case you need to test\ncross-module-warning-logging, this is not your tool of choice.\n\nParameters\n----------\nobj : callable, default=None\n callable where you want to ignore the warnings.\ncategory : warning class, default=Warning\n The category to filter. If Warning, all categories will be muted.\n\nExamples\n--------\n>>> with ignore_warnings():\n... warnings.warn('buhuhuhu')\n\n>>> def nasty_warn():\n... warnings.warn('buhuhuhu')\n... print(42)\n\n>>> ignore_warnings(nasty_warn)()\n42" - }, - { - "name": "assert_raise_message", - "decorators": [], - "parameters": [ - { - "name": "exceptions", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An Exception object." - }, - { - "name": "message", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The error message or a substring of the error message." - }, - { - "name": "function", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable object to raise error." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kwargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to test the message raised in an exception.\n\nGiven an exception, a callable to raise the exception, and\na message string, tests that the correct exception is raised and\nthat the message is a substring of the error thrown. Used to test\nthat the specific message thrown during an exception is correct.\n\nParameters\n----------\nexceptions : exception or tuple of exception\n An Exception object.\n\nmessage : str\n The error message or a substring of the error message.\n\nfunction : callable\n Callable object to raise error.\n\n*args : the positional arguments to `function`.\n\n**kwargs : the keyword arguments to `function`." - }, - { - "name": "assert_allclose_dense_sparse", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First array to compare." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Second array to compare." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "1e-07", - "limitation": null, - "ignored": false, - "docstring": "relative tolerance; see numpy.allclose." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "1e-9", - "limitation": null, - "ignored": false, - "docstring": "absolute tolerance; see numpy.allclose. Note that the default here is more tolerant than the default for numpy.testing.assert_allclose, where atol=0." - }, - { - "name": "err_msg", - "type": "str", - "hasDefault": true, - "default": "''", - "limitation": null, - "ignored": false, - "docstring": "Error message to raise." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.\n\nParameters\n----------\nx : {array-like, sparse matrix}\n First array to compare.\n\ny : {array-like, sparse matrix}\n Second array to compare.\n\nrtol : float, default=1e-07\n relative tolerance; see numpy.allclose.\n\natol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0.\n\nerr_msg : str, default=''\n Error message to raise." - }, - { - "name": "set_random_state", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set random state of an estimator if it has the `random_state` param.\n\nParameters\n----------\nestimator : object\n The estimator.\nrandom_state : int, RandomState instance or None, default=0\n Pseudo random number generator state.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `." - }, - { - "name": "check_skip_network", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_delete_folder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Utility function to cleanup a temporary folder if still existing.\n\nCopy from joblib.pool (for independence)." - }, - { - "name": "create_memmap_backed_data", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "mmap_mode", - "type": "str", - "hasDefault": true, - "default": "'r'", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "return_folder", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Parameters\n----------\ndata\nmmap_mode : str, default='r'\nreturn_folder : bool, default=False" - }, - { - "name": "_get_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper to get function arguments." - }, - { - "name": "_get_func_name", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function object." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get function full name.\n\nParameters\n----------\nfunc : callable\n The function object.\n\nReturns\n-------\nname : str\n The function name." - }, - { - "name": "check_docstring_parameters", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function object to test." - }, - { - "name": "doc", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Docstring if it is passed manually to the test." - }, - { - "name": "ignore", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to ignore." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper to check docstring.\n\nParameters\n----------\nfunc : callable\n The function object to test.\ndoc : str, default=None\n Docstring if it is passed manually to the test.\nignore : list, default=None\n Parameters to ignore.\n\nReturns\n-------\nincorrect : list\n A list of string describing the incorrect results." - }, - { - "name": "assert_run_python_script", - "decorators": [], - "parameters": [ - { - "name": "source_code", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Python source code to execute." - }, - { - "name": "timeout", - "type": "int", - "hasDefault": true, - "default": "60", - "limitation": null, - "ignored": false, - "docstring": "Time in seconds before timeout." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Utility to check assertions in an independent Python subprocess.\n\nThe script provided in the source code should return 0 and not print\nanything on stderr or stdout.\n\nThis is a port from cloudpickle https://github.com/cloudpipe/cloudpickle\n\nParameters\n----------\nsource_code : str\n The Python source code to execute.\ntimeout : int, default=60\n Time in seconds before timeout." - }, - { - "name": "_convert_container", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "raises", - "decorators": [], - "parameters": [ - { - "name": "excepted_exc_type", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The exception that should be raised by the block. If a list, the block should raise one of the exceptions." - }, - { - "name": "match", - "type": "Union[List[str], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A regex that the exception message should match. If a list, one of the entries must match. If None, match isn't enforced." - }, - { - "name": "may_pass", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the block is allowed to not raise an exception. Useful in cases where some estimators may support a feature but others must fail with an appropriate error message. By default, the context manager will raise an exception if the block does not raise an exception." - }, - { - "name": "err_msg", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the context manager fails (e.g. the block fails to raise the proper exception, or fails to match), then an AssertionError is raised with this message. By default, an AssertionError is raised with a default error message (depends on the kind of failure). Use this to indicate how users should fix their estimators to pass the checks." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Context manager to ensure exceptions are raised within a code block.\n\nThis is similar to and inspired from pytest.raises, but supports a few\nother cases.\n\nThis is only intended to be used in estimator_checks.py where we don't\nwant to use pytest. In the rest of the code base, just use pytest.raises\ninstead.\n\nParameters\n----------\nexcepted_exc_type : Exception or list of Exception\n The exception that should be raised by the block. If a list, the block\n should raise one of the exceptions.\nmatch : str or list of str, default=None\n A regex that the exception message should match. If a list, one of\n the entries must match. If None, match isn't enforced.\nmay_pass : bool, default=False\n If True, the block is allowed to not raise an exception. Useful in\n cases where some estimators may support a feature but others must\n fail with an appropriate error message. By default, the context\n manager will raise an exception if the block does not raise an\n exception.\nerr_msg : str, default=None\n If the context manager fails (e.g. the block fails to raise the\n proper exception, or fails to match), then an AssertionError is\n raised with this message. By default, an AssertionError is raised\n with a default error message (depends on the kind of failure). Use\n this to indicate how users should fix their estimators to pass the\n checks.\n\nAttributes\n----------\nraised_and_matched : bool\n True if an exception was raised and a match was found, False otherwise." - } - ] - }, - { - "name": "sklearn.utils", - "imports": [ - "import pkgutil", - "import inspect", - "from importlib import import_module", - "from operator import itemgetter", - "from collections.abc import Sequence", - "from contextlib import contextmanager", - "from itertools import compress", - "from itertools import islice", - "import numbers", - "import platform", - "import struct", - "import timeit", - "from pathlib import Path", - "import warnings", - "import numpy as np", - "from scipy.sparse import issparse", - "from murmurhash import murmurhash3_32", - "from class_weight import compute_class_weight", - "from class_weight import compute_sample_weight", - "from None import _joblib", - "from exceptions import DataConversionWarning", - "from deprecation import deprecated", - "from fixes import np_version", - "from fixes import parse_version", - "from _estimator_html_repr import estimator_html_repr", - "from validation import as_float_array", - "from validation import assert_all_finite", - "from validation import check_random_state", - "from validation import column_or_1d", - "from validation import check_array", - "from validation import check_consistent_length", - "from validation import check_X_y", - "from validation import indexable", - "from validation import check_symmetric", - "from validation import check_scalar", - "from validation import _deprecate_positional_args", - "from None import get_config", - "import matplotlib", - "import pandas", - "from _testing import ignore_warnings", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import ClusterMixin" - ], - "classes": [ - { - "name": "Bunch", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setattr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__dir__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getattr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Container object exposing keys as attributes.\n\nBunch objects are sometimes used as an output for functions and methods.\nThey extend dictionaries by enabling values to be accessed by key,\n`bunch[\"value_key\"]`, or by an attribute, `bunch.value_key`.\n\nExamples\n--------\n>>> b = Bunch(a=1, b=2)\n>>> b['b']\n2\n>>> b.b\n2\n>>> b.a = 3\n>>> b['a']\n3\n>>> b.c = 6\n>>> b['c']\n6" - } - ], - "functions": [ - { - "name": "safe_mask", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data on which to apply mask." - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mask to be used on X." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a mask which is safe to use on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Data on which to apply mask.\n\nmask : ndarray\n Mask to be used on X.\n\nReturns\n-------\n mask" - }, - { - "name": "axis0_safe_slice", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data on which to apply mask." - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mask to be used on X." - }, - { - "name": "len_mask", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The length of the mask." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "This mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Data on which to apply mask.\n\nmask : ndarray\n Mask to be used on X.\n\nlen_mask : int\n The length of the mask.\n\nReturns\n-------\n mask" - }, - { - "name": "_array_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Index an array or scipy.sparse consistently across NumPy version." - }, - { - "name": "_pandas_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Index a pandas dataframe or a series." - }, - { - "name": "_list_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Index a Python list." - }, - { - "name": "_determine_key_type", - "decorators": [], - "parameters": [ - { - "name": "key", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The key from which we want to infer the data type." - }, - { - "name": "accept_slice", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to raise an error if the key is a slice." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine the data type of key.\n\nParameters\n----------\nkey : scalar, slice or array-like\n The key from which we want to infer the data type.\n\naccept_slice : bool, default=True\n Whether or not to raise an error if the key is a slice.\n\nReturns\n-------\ndtype : {'int', 'str', 'bool', None}\n Returns the data type of key." - }, - { - "name": "_safe_indexing", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[List, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to sample rows, items or columns. `list` are only supported when `axis=0`." - }, - { - "name": "indices", - "type": "Union[int, bool, str, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- If `axis=0`, boolean and integer array-like, integer slice, and scalar integer are supported. - If `axis=1`: - to select a single column, `indices` can be of `int` type for all `X` types and `str` only for dataframe. The selected subset will be 1D, unless `X` is a sparse matrix in which case it will be 2D. - to select multiples columns, `indices` can be one of the following: `list`, `array`, `slice`. The type used in these containers can be one of the following: `int`, 'bool' and `str`. However, `str` is only supported when `X` is a dataframe. The selected subset will be 2D." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The axis along which `X` will be subsampled. `axis=0` will select rows while `axis=1` will select columns." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return rows, items or columns of X using indices.\n\n.. warning::\n\n This utility is documented, but **private**. This means that\n backward compatibility might be broken without any deprecation\n cycle.\n\nParameters\n----------\nX : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series\n Data from which to sample rows, items or columns. `list` are only\n supported when `axis=0`.\nindices : bool, int, str, slice, array-like\n - If `axis=0`, boolean and integer array-like, integer slice,\n and scalar integer are supported.\n - If `axis=1`:\n - to select a single column, `indices` can be of `int` type for\n all `X` types and `str` only for dataframe. The selected subset\n will be 1D, unless `X` is a sparse matrix in which case it will\n be 2D.\n - to select multiples columns, `indices` can be one of the\n following: `list`, `array`, `slice`. The type used in\n these containers can be one of the following: `int`, 'bool' and\n `str`. However, `str` is only supported when `X` is a dataframe.\n The selected subset will be 2D.\naxis : int, default=0\n The axis along which `X` will be subsampled. `axis=0` will select\n rows while `axis=1` will select columns.\n\nReturns\n-------\nsubset\n Subset of X on axis 0 or 1.\n\nNotes\n-----\nCSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are\nnot supported." - }, - { - "name": "_get_column_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get feature column indices for input data X and key.\n\nFor accepted values of `key`, see the docstring of\n:func:`_safe_indexing_column`." - }, - { - "name": "resample", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indexable data-structures can be arrays, lists, dataframes or scipy sparse matrices with consistent first dimension." - }, - { - "name": "replace", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Implements resampling with replacement. If False, this will implement (sliced) random permutations." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate. If left to None this is automatically set to the first dimension of the arrays. If replace is False it should not be larger than the length of arrays." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "stratify", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, data is split in a stratified fashion, using this as the class labels." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Resample arrays or sparse matrices in a consistent way.\n\nThe default strategy implements one step of the bootstrapping\nprocedure.\n\nParameters\n----------\n*arrays : sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\nreplace : bool, default=True\n Implements resampling with replacement. If False, this will implement\n (sliced) random permutations.\n\nn_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays.\n If replace is False it should not be larger than the length of\n arrays.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nstratify : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n\nReturns\n-------\nresampled_arrays : sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)\n Sequence of resampled copies of the collections. The original arrays\n are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import resample\n >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)\n >>> X\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 4 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([0, 1, 0])\n\n >>> resample(y, n_samples=2, random_state=0)\n array([0, 1])\n\nExample using stratification::\n\n >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]\n >>> resample(y, n_samples=5, replace=False, stratify=y,\n ... random_state=0)\n [1, 1, 1, 0, 1]\n\nSee Also\n--------\nshuffle" - }, - { - "name": "shuffle", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indexable data-structures can be arrays, lists, dataframes or scipy sparse matrices with consistent first dimension." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate. If left to None this is automatically set to the first dimension of the arrays. It should not be larger than the length of arrays." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.\n\nParameters\n----------\n*arrays : sequence of indexable data-structures\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nn_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays. It should\n not be larger than the length of arrays.\n\nReturns\n-------\nshuffled_arrays : sequence of indexable data-structures\n Sequence of shuffled copies of the collections. The original arrays\n are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import shuffle\n >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n >>> X\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 3 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([2, 1, 0])\n\n >>> shuffle(y, n_samples=2, random_state=0)\n array([0, 1])\n\nSee Also\n--------\nresample" - }, - { - "name": "safe_sqr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[NDArray, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to create a copy of X and operate on it or to perform inplace computation (default behaviour)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Element wise squaring of array-likes and sparse matrices.\n\nParameters\n----------\nX : {array-like, ndarray, sparse matrix}\n\ncopy : bool, default=True\n Whether to create a copy of X and operate on it or to perform\n inplace computation (default behaviour).\n\nReturns\n-------\nX ** 2 : element wise square" - }, - { - "name": "_chunk_generator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Chunk generator, ``gen`` into lists of length ``chunksize``. The last\nchunk may have a length less than ``chunksize``." - }, - { - "name": "gen_batches", - "decorators": [], - "parameters": [ - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of element in each batch." - }, - { - "name": "min_batch_size", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum batch size to produce." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generator to create slices containing batch_size elements, from 0 to n.\n\nThe last slice may contain less than batch_size elements, when batch_size\ndoes not divide n.\n\nParameters\n----------\nn : int\nbatch_size : int\n Number of element in each batch.\nmin_batch_size : int, default=0\n Minimum batch size to produce.\n\nYields\n------\nslice of batch_size elements\n\nExamples\n--------\n>>> from sklearn.utils import gen_batches\n>>> list(gen_batches(7, 3))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(6, 3))\n[slice(0, 3, None), slice(3, 6, None)]\n>>> list(gen_batches(2, 3))\n[slice(0, 2, None)]\n>>> list(gen_batches(7, 3, min_batch_size=0))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(7, 3, min_batch_size=2))\n[slice(0, 3, None), slice(3, 7, None)]" - }, - { - "name": "gen_even_slices", - "decorators": [], - "parameters": [ - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_packs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of slices to generate." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples. Pass n_samples when the slices are to be used for sparse matrix indexing; slicing off-the-end raises an exception, while it works for NumPy arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generator to create n_packs slices going up to n.\n\nParameters\n----------\nn : int\nn_packs : int\n Number of slices to generate.\nn_samples : int, default=None\n Number of samples. Pass n_samples when the slices are to be used for\n sparse matrix indexing; slicing off-the-end raises an exception, while\n it works for NumPy arrays.\n\nYields\n------\nslice\n\nExamples\n--------\n>>> from sklearn.utils import gen_even_slices\n>>> list(gen_even_slices(10, 1))\n[slice(0, 10, None)]\n>>> list(gen_even_slices(10, 10))\n[slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n>>> list(gen_even_slices(10, 5))\n[slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n>>> list(gen_even_slices(10, 3))\n[slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]" - }, - { - "name": "tosequence", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Cast iterable x to a Sequence, avoiding a copy if possible.\n\nParameters\n----------\nx : iterable" - }, - { - "name": "_to_object_array", - "decorators": [], - "parameters": [ - { - "name": "sequence", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequence to be converted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert sequence to a 1-D NumPy array of object dtype.\n\nnumpy.array constructor has a similar use but it's output\nis ambiguous. It can be 1-D NumPy array of object dtype if\nthe input is a ragged array, but if the input is a list of\nequal length arrays, then the output is a 2D numpy.array.\n_to_object_array solves this ambiguity by guarantying that\nthe output is a 1-D NumPy array of objects for any input.\n\nParameters\n----------\nsequence : array-like of shape (n_elements,)\n The sequence to be converted.\n\nReturns\n-------\nout : ndarray of shape (n_elements,), dtype=object\n The converted sequence into a 1-D NumPy array of object dtype.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _to_object_array\n>>> _to_object_array([np.array([0]), np.array([1])])\narray([array([0]), array([1])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)" - }, - { - "name": "indices_to_mask", - "decorators": [], - "parameters": [ - { - "name": "indices", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of integers treated as indices." - }, - { - "name": "mask_length", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Length of boolean mask to be generated. This parameter must be greater than max(indices)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert list of indices to boolean mask.\n\nParameters\n----------\nindices : list-like\n List of integers treated as indices.\nmask_length : int\n Length of boolean mask to be generated.\n This parameter must be greater than max(indices).\n\nReturns\n-------\nmask : 1d boolean nd-array\n Boolean array that is True where indices are present, else False.\n\nExamples\n--------\n>>> from sklearn.utils import indices_to_mask\n>>> indices = [1, 2 , 3, 4]\n>>> indices_to_mask(indices, 5)\narray([False, True, True, True, True])" - }, - { - "name": "_message_with_time", - "decorators": [], - "parameters": [ - { - "name": "source", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String indicating the source or the reference of the message." - }, - { - "name": "message", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Short message." - }, - { - "name": "time", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Time in seconds." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create one line message for logging purposes.\n\nParameters\n----------\nsource : str\n String indicating the source or the reference of the message.\n\nmessage : str\n Short message.\n\ntime : int\n Time in seconds." - }, - { - "name": "_print_elapsed_time", - "decorators": [], - "parameters": [ - { - "name": "source", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String indicating the source or the reference of the message." - }, - { - "name": "message", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Short message. If None, nothing will be printed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Log elapsed time to stdout when the context is exited.\n\nParameters\n----------\nsource : str\n String indicating the source or the reference of the message.\n\nmessage : str, default=None\n Short message. If None, nothing will be printed.\n\nReturns\n-------\ncontext_manager\n Prints elapsed time upon exit if verbose." - }, - { - "name": "get_chunk_n_rows", - "decorators": [], - "parameters": [ - { - "name": "row_bytes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The expected number of bytes of memory that will be consumed during the processing of each row." - }, - { - "name": "max_n_rows", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum return value." - }, - { - "name": "working_memory", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of rows to fit inside this number of MiB will be returned. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculates how many rows can be processed within working_memory.\n\nParameters\n----------\nrow_bytes : int\n The expected number of bytes of memory that will be consumed\n during the processing of each row.\nmax_n_rows : int, default=None\n The maximum return value.\nworking_memory : int or float, default=None\n The number of rows to fit inside this number of MiB will be returned.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\nint or the value of n_samples\n\nWarns\n-----\nIssues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB." - }, - { - "name": "is_scalar_nan", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').\n\nParameters\n----------\nx : any type\n\nReturns\n-------\nboolean\n\nExamples\n--------\n>>> is_scalar_nan(np.nan)\nTrue\n>>> is_scalar_nan(float(\"nan\"))\nTrue\n>>> is_scalar_nan(None)\nFalse\n>>> is_scalar_nan(\"\")\nFalse\n>>> is_scalar_nan([np.nan])\nFalse" - }, - { - "name": "_approximate_mode", - "decorators": [], - "parameters": [ - { - "name": "class_counts", - "type": "NDArray[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Population per class." - }, - { - "name": "n_draws", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of draws (samples to draw) from the overall population." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to break ties." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes approximate mode of multivariate hypergeometric.\n\nThis is an approximation to the mode of the multivariate\nhypergeometric given by class_counts and n_draws.\nIt shouldn't be off by more than one.\n\nIt is the mostly likely outcome of drawing n_draws many\nsamples from the population given by class_counts.\n\nParameters\n----------\nclass_counts : ndarray of int\n Population per class.\nn_draws : int\n Number of draws (samples to draw) from the overall population.\nrng : random state\n Used to break ties.\n\nReturns\n-------\nsampled_classes : ndarray of int\n Number of samples drawn from each class.\n np.sum(sampled_classes) == n_draws\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _approximate_mode\n>>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)\narray([2, 1])\n>>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)\narray([3, 1])\n>>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n... n_draws=2, rng=0)\narray([0, 1, 1, 0])\n>>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n... n_draws=2, rng=42)\narray([1, 1, 0, 0])" - }, - { - "name": "check_matplotlib_support", - "decorators": [], - "parameters": [ - { - "name": "caller_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the caller that requires matplotlib." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raise ImportError with detailed error message if mpl is not installed.\n\nPlot utilities like :func:`plot_partial_dependence` should lazily import\nmatplotlib and call this helper before any computation.\n\nParameters\n----------\ncaller_name : str\n The name of the caller that requires matplotlib." - }, - { - "name": "check_pandas_support", - "decorators": [], - "parameters": [ - { - "name": "caller_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the caller that requires pandas." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raise ImportError with detailed error message if pandas is not\ninstalled.\n\nPlot utilities like :func:`fetch_openml` should lazily import\npandas and call this helper before any computation.\n\nParameters\n----------\ncaller_name : str\n The name of the caller that requires pandas." - }, - { - "name": "all_estimators", - "decorators": [], - "parameters": [ - { - "name": "type_filter", - "type": "Literal[\"classifier\", \"regressor\", \"cluster\", \"transformer\"]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Which kind of estimators should be returned. If None, no filter is applied and all estimators are returned. Possible values are 'classifier', 'regressor', 'cluster' and 'transformer' to get estimators only of these specific types, or a list of these to get the estimators that fit at least one of the types." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a list of all estimators from sklearn.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.\n\nParameters\n----------\ntype_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"} or list of such str, default=None\n Which kind of estimators should be returned. If None, no filter is\n applied and all estimators are returned. Possible values are\n 'classifier', 'regressor', 'cluster' and 'transformer' to get\n estimators only of these specific types, or a list of these to\n get the estimators that fit at least one of the types.\n\nReturns\n-------\nestimators : list of tuples\n List of (name, class), where ``name`` is the class name as string\n and ``class`` is the actuall type of the class." - } - ] - }, - { - "name": "sklearn.utils.tests.conftest", - "imports": [ - "import pytest", - "import sklearn" - ], - "classes": [], - "functions": [ - { - "name": "print_changed_only_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_arpack", - "imports": [ - "import pytest", - "from numpy.testing import assert_allclose", - "from sklearn.utils import check_random_state", - "from sklearn.utils._arpack import _init_arpack_v0" - ], - "classes": [], - "functions": [ - { - "name": "test_init_arpack_v0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_class_weight", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.datasets import make_blobs", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.utils.class_weight import compute_class_weight", - "from sklearn.utils.class_weight import compute_sample_weight", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_compute_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_not_present", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_dict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_balanced_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_balanced_unordered", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight_with_subsample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight_more_than_32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_cython_blas", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._cython_blas import _dot_memview", - "from sklearn.utils._cython_blas import _asum_memview", - "from sklearn.utils._cython_blas import _axpy_memview", - "from sklearn.utils._cython_blas import _nrm2_memview", - "from sklearn.utils._cython_blas import _copy_memview", - "from sklearn.utils._cython_blas import _scal_memview", - "from sklearn.utils._cython_blas import _rotg_memview", - "from sklearn.utils._cython_blas import _rot_memview", - "from sklearn.utils._cython_blas import _gemv_memview", - "from sklearn.utils._cython_blas import _ger_memview", - "from sklearn.utils._cython_blas import _gemm_memview", - "from sklearn.utils._cython_blas import RowMajor", - "from sklearn.utils._cython_blas import ColMajor", - "from sklearn.utils._cython_blas import Trans", - "from sklearn.utils._cython_blas import NoTrans" - ], - "classes": [], - "functions": [ - { - "name": "_numpy_to_cython", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_no_op", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_asum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_axpy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nrm2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rotg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gemv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ger", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gemm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_deprecation", - "imports": [ - "import pickle", - "from sklearn.utils.deprecation import _is_deprecated", - "from sklearn.utils.deprecation import deprecated", - "from sklearn.utils._testing import assert_warns_message" - ], - "classes": [ - { - "name": "MockClass1", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "MockClass2", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockClass3", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockClass4", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "mock_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_encode", - "imports": [ - "import pickle", - "import numpy as np", - "import pytest", - "from numpy.testing import assert_array_equal", - "from sklearn.utils._encode import _unique", - "from sklearn.utils._encode import _encode", - "from sklearn.utils._encode import _check_unknown" - ], - "classes": [], - "functions": [ - { - "name": "test_encode_util", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encode_with_check_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_assert_check_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_unknown_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_util_missing_values_objects", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_util_missing_values_numeric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_util_with_all_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_unknown_with_both_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_estimator_checks", - "imports": [ - "import unittest", - "import sys", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.utils import deprecated", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import MinimalClassifier", - "from sklearn.utils._testing import MinimalRegressor", - "from sklearn.utils._testing import MinimalTransformer", - "from sklearn.utils._testing import SkipTest", - "from sklearn.utils.estimator_checks import check_estimator", - "from sklearn.utils.estimator_checks import _NotAnArray", - "from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier", - "from sklearn.utils.estimator_checks import set_random_state", - "from sklearn.utils.estimator_checks import _set_checking_parameters", - "from sklearn.utils.estimator_checks import check_estimators_unfitted", - "from sklearn.utils.estimator_checks import check_fit_score_takes_y", - "from sklearn.utils.estimator_checks import check_no_attributes_set_in_init", - "from sklearn.utils.estimator_checks import check_classifier_data_not_an_array", - "from sklearn.utils.estimator_checks import check_regressor_data_not_an_array", - "from sklearn.utils.estimator_checks import check_estimator_get_tags_default_keys", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.utils.estimator_checks import check_outlier_corruption", - "from sklearn.utils.fixes import np_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.mixture import GaussianMixture", - "from sklearn.cluster import MiniBatchKMeans", - "from sklearn.decomposition import NMF", - "from sklearn.linear_model import MultiTaskElasticNet", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.svm import SVC", - "from sklearn.svm import NuSVC", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.utils.validation import check_array", - "from sklearn.utils import all_estimators", - "from sklearn.exceptions import SkipTestWarning", - "from pandas import Series", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.utils import compute_class_weight", - "from pandas import DataFrame", - "from sklearn.datasets import load_iris" - ], - "classes": [ - { - "name": "CorrectNotFittedError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Exception class to raise if estimator is used before fitting.\n\nLike NotFittedError, it inherits from ValueError, but not from\nAttributeError. Used for testing only." - }, - { - "name": "BaseBadClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ChangesDict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SetsWrongAttribute", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ChangesWrongAttribute", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ChangesUnderscoreAttribute", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "RaisesErrorInSetParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "HasMutableParameters", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "HasImmutableParameters", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ModifiesValueInsteadOfRaisingError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ModifiesAnotherValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoCheckinPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoSparseClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "CorrectNotFittedErrorClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoSampleWeightPandasSeriesType", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "BadBalancedWeightsClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "BadTransformerWithoutMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NotInvariantPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NotInvariantSampleOrder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "LargeSparseNotSupportedClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SparseTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "EstimatorInconsistentForPandas", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "UntaggedBinaryClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TaggedBinaryClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "EstimatorMissingDefaultTags", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_get_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "RequiresPositiveYRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "PoorScoreLogisticRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_not_an_array_array_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_fit_score_takes_y_works_on_deprecated_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_outlier_corruption", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_transformer_no_mixin", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_clones", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimators_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_no_attributes_set_in_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_classifier_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_regressor_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_get_tags_default_keys", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "run_tests_without_pytest", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Runs the tests in this file without using pytest.\n " - }, - { - "name": "test_check_class_weight_balanced_linear_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_estimators_all_public", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_xfail_ignored_in_check_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minimal_class_implementation_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_estimator_html_repr", - "imports": [ - "from contextlib import closing", - "from io import StringIO", - "import pytest", - "from sklearn import config_context", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.neural_network import MLPClassifier", - "from sklearn.impute import SimpleImputer", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import FeatureUnion", - "from sklearn.compose import ColumnTransformer", - "from sklearn.ensemble import VotingClassifier", - "from sklearn.feature_selection import SelectPercentile", - "from sklearn.cluster import Birch", - "from sklearn.cluster import AgglomerativeClustering", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.multiclass import OneVsOneClassifier", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.ensemble import StackingRegressor", - "from sklearn.gaussian_process import GaussianProcessRegressor", - "from sklearn.gaussian_process.kernels import RationalQuadratic", - "from sklearn.utils._estimator_html_repr import _write_label_html", - "from sklearn.utils._estimator_html_repr import _get_visual_block", - "from sklearn.utils._estimator_html_repr import estimator_html_repr" - ], - "classes": [], - "functions": [ - { - "name": "test_write_label_html", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_single_str_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_single_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_feature_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_voting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_column_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimator_html_repr_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classsifer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_birch_duck_typing_meta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_classifier_duck_typing_meta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_duck_typing_nested_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_estimator_print_change_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_extmath", - "imports": [ - "import numpy as np", - "from scipy import sparse", - "from scipy import linalg", - "from scipy import stats", - "from scipy.special import expit", - "import pytest", - "from sklearn.utils import gen_batches", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils.extmath import density", - "from sklearn.utils.extmath import _safe_accumulator_op", - "from sklearn.utils.extmath import randomized_svd", - "from sklearn.utils.extmath import row_norms", - "from sklearn.utils.extmath import weighted_mode", - "from sklearn.utils.extmath import cartesian", - "from sklearn.utils.extmath import log_logistic", - "from sklearn.utils.extmath import svd_flip", - "from sklearn.utils.extmath import _incremental_mean_and_var", - "from sklearn.utils.extmath import _incremental_weighted_mean_and_var", - "from sklearn.utils.extmath import _deterministic_vector_sign_flip", - "from sklearn.utils.extmath import softmax", - "from sklearn.utils.extmath import stable_cumsum", - "from sklearn.utils.extmath import safe_sparse_dot", - "from sklearn.datasets import make_low_rank_matrix" - ], - "classes": [], - "functions": [ - { - "name": "test_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_randomized_svd_low_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_low_rank_all_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_row_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_low_rank_with_noise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_infinite_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_transpose_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_power_iteration_normalizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_sparse_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svd_flip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_sign_flip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_sign_flip_with_transpose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cartesian", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_sigmoid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "rng", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_weighted_mean_and_variance_simple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_weighted_mean_and_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_weighted_mean_and_variance_ignore_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_variance_update_formulas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_mean_and_variance_ignore_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_variance_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_variance_ddof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vector_sign_flip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_softmax", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stable_cumsum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_nd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_2d_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_dense_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_fast_dict", - "imports": [ - "import numpy as np", - "from sklearn.utils._fast_dict import IntFloatDict", - "from sklearn.utils._fast_dict import argmin" - ], - "classes": [], - "functions": [ - { - "name": "test_int_float_dict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_int_float_dict_argmin", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_fixes", - "imports": [ - "import math", - "import numpy as np", - "import pytest", - "import scipy.stats", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils.fixes import _joblib_parallel_args", - "from sklearn.utils.fixes import _object_dtype_isnan", - "from sklearn.utils.fixes import loguniform", - "from sklearn.utils.fixes import MaskedArray", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "test_joblib_parallel_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_object_dtype_isnan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loguniform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_masked_array_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_metaestimators", - "imports": [ - "from sklearn.utils.metaestimators import if_delegate_has_method" - ], - "classes": [ - { - "name": "Prefix", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockMetaEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is a mock delegated function" - } - ], - "docstring": "This is a mock meta estimator" - }, - { - "name": "MetaEst", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock meta estimator" - }, - { - "name": "MetaEstTestTuple", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock meta estimator to test passing a tuple of delegates" - }, - { - "name": "MetaEstTestList", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock meta estimator to test passing a list of delegates" - }, - { - "name": "HasPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock sub-estimator with predict method" - }, - { - "name": "HasNoPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "A mock sub-estimator with no predict method" - } - ], - "functions": [ - { - "name": "test_delegated_docstring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_if_delegate_has_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_mocking", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import sparse", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_iris", - "from sklearn.utils import check_array", - "from sklearn.utils import _safe_indexing", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils._mocking import CheckingClassifier" - ], - "classes": [], - "functions": [ - { - "name": "iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_success", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_on_fit_success", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_on_fit_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X_on_predict_success", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X_on_predict_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_with_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_missing_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_methods_to_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_multiclass", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from itertools import product", - "import pytest", - "from scipy.sparse import issparse", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils.estimator_checks import _NotAnArray", - "from sklearn.utils.fixes import parse_version", - "from sklearn.utils.multiclass import unique_labels", - "from sklearn.utils.multiclass import is_multilabel", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils.multiclass import class_distribution", - "from sklearn.utils.multiclass import check_classification_targets", - "from sklearn.utils.multiclass import _ovr_decision_function", - "from sklearn.utils.metaestimators import _safe_split", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.svm import SVC", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_unique_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_labels_non_specific", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_labels_mixed_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_classification_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_type_of_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_type_of_target_pandas_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_split_with_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_murmurhash", - "imports": [ - "import numpy as np", - "from sklearn.utils.murmurhash import murmurhash3_32", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_mmhash3_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mmhash3_int_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mmhash3_bytes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mmhash3_unicode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_collision_on_byte_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_optimize", - "imports": [ - "import numpy as np", - "from sklearn.utils.optimize import _newton_cg", - "from scipy.optimize import fmin_ncg", - "from sklearn.utils._testing import assert_array_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_newton_cg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_parallel", - "imports": [ - "from distutils.version import LooseVersion", - "import pytest", - "from joblib import Parallel", - "import joblib", - "from numpy.testing import assert_array_equal", - "from sklearn._config import config_context", - "from sklearn._config import get_config", - "from sklearn.utils.fixes import delayed" - ], - "classes": [], - "functions": [ - { - "name": "get_working_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_configuration_passes_through_to_joblib", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_pprint", - "imports": [ - "import re", - "from pprint import PrettyPrinter", - "import numpy as np", - "from sklearn.utils._pprint import _EstimatorPrettyPrinter", - "from sklearn.linear_model import LogisticRegressionCV", - "from sklearn.pipeline import make_pipeline", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import chi2", - "from sklearn import set_config", - "from sklearn import config_context" - ], - "classes": [ - { - "name": "LogisticRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "StandardScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "RFE", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "GridSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "CountVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Pipeline", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "PCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NMF", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SimpleImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_basic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_changed_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deeply_nested", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_max_elements_to_show", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bruteforce_ellipsis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_builtin_prettyprinter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kwargs_in_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complexity_print_changed_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_random", - "imports": [ - "import numpy as np", - "import pytest", - "import scipy.sparse as sp", - "from scipy.special import comb", - "from numpy.testing import assert_array_almost_equal", - "from sklearn.utils.random import _random_choice_csc", - "from sklearn.utils.random import sample_without_replacement", - "from sklearn.utils._random import _our_rand_r_py" - ], - "classes": [], - "functions": [ - { - "name": "test_invalid_sample_without_replacement_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_without_replacement_algorithms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_edge_case_of_sample_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_int_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_choice_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_choice_csc_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_our_rand_r", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_seq_dataset", - "imports": [ - "import numpy as np", - "import pytest", - "import scipy.sparse as sp", - "from numpy.testing import assert_array_equal", - "from sklearn.utils._seq_dataset import ArrayDataset32", - "from sklearn.utils._seq_dataset import ArrayDataset64", - "from sklearn.utils._seq_dataset import CSRDataset32", - "from sklearn.utils._seq_dataset import CSRDataset64", - "from sklearn.datasets import load_iris", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "assert_csr_equal_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_dense_dataset_32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_dense_dataset_64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_sparse_dataset_32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_sparse_dataset_64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_seq_dataset_basic_iteration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_seq_dataset_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fused_types_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_buffer_dtype_mismatch_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_shortest_path", - "imports": [ - "from collections import defaultdict", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from sklearn.utils.graph import graph_shortest_path", - "from sklearn.utils.graph import single_source_shortest_path_length" - ], - "classes": [], - "functions": [ - { - "name": "floyd_warshall_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "generate_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_floyd_warshall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dijkstra", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shortest_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dijkstra_bug_fix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_show_versions", - "imports": [ - "from sklearn.utils._show_versions import _get_sys_info", - "from sklearn.utils._show_versions import _get_deps_info", - "from sklearn.utils._show_versions import show_versions", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_get_sys_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_deps_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_show_versions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_sparsefuncs", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from numpy.random import RandomState", - "from sklearn.datasets import make_classification", - "from sklearn.utils.sparsefuncs import mean_variance_axis", - "from sklearn.utils.sparsefuncs import incr_mean_variance_axis", - "from sklearn.utils.sparsefuncs import inplace_column_scale", - "from sklearn.utils.sparsefuncs import inplace_row_scale", - "from sklearn.utils.sparsefuncs import inplace_swap_row", - "from sklearn.utils.sparsefuncs import inplace_swap_column", - "from sklearn.utils.sparsefuncs import min_max_axis", - "from sklearn.utils.sparsefuncs import count_nonzero", - "from sklearn.utils.sparsefuncs import csc_median_axis_0", - "from sklearn.utils.sparsefuncs_fast import assign_rows_csr", - "from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l1", - "from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l2", - "from sklearn.utils.sparsefuncs_fast import csr_row_norms", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_mean_variance_axis0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_variance_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_weighted_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_weighted_axis0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_dim_mismatch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we raise proper error when axis=1 and the dimension mismatch.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/pull/18655" - }, - { - "name": "test_incr_mean_variance_axis_equivalence_mean_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_no_new_n", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_n_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_ignore_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_variance_illegal_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_densify_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_column_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_row_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_swap_row", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_swap_column", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_axis_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_nonzero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_csc_row_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_csr_row_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_stats", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "from pytest import approx", - "from sklearn.utils.stats import _weighted_percentile" - ], - "classes": [], - "functions": [ - { - "name": "test_weighted_percentile", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_percentile_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_percentile_zero_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_median_equal_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_median_integer_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_percentile_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_tags", - "imports": [ - "import pytest", - "from sklearn.base import BaseEstimator", - "from sklearn.utils._tags import _DEFAULT_TAGS", - "from sklearn.utils._tags import _safe_tags" - ], - "classes": [ - { - "name": "NoTagsEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "MoreTagsEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_safe_tags_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_tags_no_get_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_testing", - "imports": [ - "import warnings", - "import unittest", - "import sys", - "import os", - "import atexit", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "from sklearn.utils.deprecation import deprecated", - "from sklearn.utils.metaestimators import if_delegate_has_method", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import set_random_state", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import check_docstring_parameters", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.utils._testing import create_memmap_backed_data", - "from sklearn.utils._testing import _delete_folder", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils._testing import raises", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis" - ], - "classes": [ - { - "name": "TestWarns", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "test_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warn_wrong_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Klass", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "f_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "f_bad_sections", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameter\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nResults\n-------\nc : list\n Parameter c" - } - ], - "docstring": null - }, - { - "name": "MockEst", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "MockEstimator" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockMetaEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "MetaEstimator to check if doctest on delegated methods work.\n\nParameters\n---------\ndelegate : estimator\n Delegated estimator." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter y" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is available only if delegate has predict.\n\nParameters\n----------\ny : ndarray\n Parameter y" - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is available only if delegate has score.\n\nParameters\n---------\ny : ndarray\n Parameter y" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is available only if delegate has predict_proba.\n\nParameters\n---------\nX : ndarray\n Parameter X" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Incorrect docstring but should not be tested" - } - ], - "docstring": null - }, - { - "name": "RegistrationCounter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_set_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_allclose_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_raises_msg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_raise_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ignore_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "f_ok", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nReturns\n-------\nc : list\n Parameter c" - }, - { - "name": "f_bad_sections", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nResults\n-------\nc : list\n Parameter c" - }, - { - "name": "f_bad_order", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nReturns\n-------\nc : list\n Parameter c" - }, - { - "name": "f_too_many_param_docstring", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - }, - { - "name": "c", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter c" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : int\n Parameter b\nc : int\n Parameter c\n\nReturns\n-------\nd : list\n Parameter c" - }, - { - "name": "f_missing", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\n\nReturns\n-------\nc : list\n Parameter c" - }, - { - "name": "f_check_param_definition", - "decorators": [], - "parameters": [ - { - "name": "a: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b:", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - }, - { - "name": "c :", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter c" - }, - { - "name": "d:int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter d" - }, - { - "name": "e", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "No typespec is allowed without colon" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na: int\n Parameter a\nb:\n Parameter b\nc :\n Parameter c\nd:int\n Parameter d\ne\n No typespec is allowed without colon" - }, - { - "name": "test_check_docstring_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tempmemmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_create_memmap_backed_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convert_container", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_utils", - "imports": [ - "from copy import copy", - "from itertools import chain", - "import warnings", - "import string", - "import timeit", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils import check_random_state", - "from sklearn.utils import _determine_key_type", - "from sklearn.utils import deprecated", - "from sklearn.utils import gen_batches", - "from sklearn.utils import _get_column_indices", - "from sklearn.utils import resample", - "from sklearn.utils import safe_mask", - "from sklearn.utils import column_or_1d", - "from sklearn.utils import _safe_indexing", - "from sklearn.utils import shuffle", - "from sklearn.utils import gen_even_slices", - "from sklearn.utils import _message_with_time", - "from sklearn.utils import _print_elapsed_time", - "from sklearn.utils import get_chunk_n_rows", - "from sklearn.utils import is_scalar_nan", - "from sklearn.utils import _to_object_array", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn import config_context", - "from sklearn.utils import parallel_backend", - "from sklearn.utils import register_parallel_backend", - "from sklearn.utils._joblib import joblib" - ], - "classes": [], - "functions": [ - { - "name": "test_make_rng", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gen_batches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratified_replace", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratify_2dy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratify_sparse_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_or_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_determine_key_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_determine_key_type_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_determine_key_type_slice_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_container_axis_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_container", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_container_axis_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_read_only_axis_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_container_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_scalar_axis_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_scalar", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_scalar_axis_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_None_axis_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_pandas_no_matching_cols_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_error_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_array_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_container_axis_0_unsupported_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_column_indices_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_column_indices_pandas_nonunique_columns_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_on_ndim_equals_three", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_dont_convert_to_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gen_even_slices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_chunk_n_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_message_with_time", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_print_elapsed_time", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_scalar_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "dummy_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecation_joblib_api", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_to_object_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_validation", - "imports": [ - "import warnings", - "import os", - "from tempfile import NamedTemporaryFile", - "from itertools import product", - "from operator import itemgetter", - "import pytest", - "from pytest import importorskip", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import SkipTest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils import as_float_array", - "from sklearn.utils import check_array", - "from sklearn.utils import check_symmetric", - "from sklearn.utils import check_X_y", - "from sklearn.utils import deprecated", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.utils.fixes import np_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.utils.estimator_checks import _NotAnArray", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.linear_model import ARDRegression", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.svm import SVR", - "from sklearn.datasets import make_blobs", - "from sklearn.utils import _safe_indexing", - "from sklearn.utils.validation import has_fit_parameter", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.utils.validation import check_consistent_length", - "from sklearn.utils.validation import assert_all_finite", - "from sklearn.utils.validation import check_memory", - "from sklearn.utils.validation import check_non_negative", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils.validation import check_scalar", - "from sklearn.utils.validation import _check_psd_eigenvalues", - "from sklearn.utils.validation import _deprecate_positional_args", - "from sklearn.utils.validation import _check_sample_weight", - "from sklearn.utils.validation import _allclose_dense_sparse", - "from sklearn.utils.validation import FLOAT_DTYPES", - "from sklearn.utils.validation import _check_fit_params", - "import sklearn", - "from sklearn.exceptions import NotFittedError", - "from sklearn.exceptions import PositiveSpectrumWarning", - "from sklearn.utils._testing import TempMemmap", - "import pandas as pd" - ], - "classes": [ - { - "name": "DummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "WrongDummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_as_float_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_as_float_array_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_np_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finite_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finiteinvalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finite_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finite_object_unsafe_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_numeric_warns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that check_array warns when it converts a bytes/string into a\nfloat." - }, - { - "name": "test_check_array_dtype_numeric_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Error when string-ike array can not be converted" - }, - { - "name": "test_check_array_pandas_na_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_pandas_dtype_object_conversion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_pandas_dtype_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_on_mock_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_dtype_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_dtype_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_sparse_type_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_sparse_no_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "X_64bit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_large_sparse_no_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_large_sparse_raise_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_min_samples_and_features_messages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_complex_data_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_has_fit_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_symmetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted_with_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_consistent_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_dataframe_fit_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suppress_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_series", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_dataframe_mixed_float_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_non_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X_y_informative_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_retrieve_samples_from_non_standard_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scalar_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that check_scalar returns no error/warning if valid inputs are\nprovided" - }, - { - "name": "test_check_scalar_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that check_scalar returns the right error if a wrong input is\ngiven" - }, - { - "name": "test_check_psd_eigenvalues_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_psd_eigenvalues_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allclose_dense_sparse_equals", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allclose_dense_sparse_not_equals", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allclose_dense_sparse_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecate_positional_args_warns_for_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecate_positional_args_warns_for_function_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecate_positional_args_warns_for_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_sparse_pandas_sp_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_pandas_sparse_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check that we raise an error with dataframe having\nsparse extension arrays with unsupported mixed dtype\nand pandas version below 1.1. pandas versions 1.1 and\nabove fixed this issue so no error will be raised." - }, - { - "name": "test_check_pandas_sparse_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn._build_utils.openmp_helpers", - "imports": [ - "import os", - "import sys", - "import textwrap", - "import warnings", - "import subprocess", - "from distutils.errors import CompileError", - "from distutils.errors import LinkError", - "from pre_build_helpers import compile_test_program" - ], - "classes": [], - "functions": [ - { - "name": "get_openmp_flag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_openmp_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check whether OpenMP test code can be compiled and run" - } - ] - }, - { - "name": "sklearn._build_utils.pre_build_helpers", - "imports": [ - "import os", - "import sys", - "import glob", - "import tempfile", - "import textwrap", - "import setuptools", - "import subprocess", - "from distutils.dist import Distribution", - "from distutils.sysconfig import customize_compiler", - "from numpy.distutils.ccompiler import new_compiler", - "from numpy.distutils.command.config_compiler import config_cc" - ], - "classes": [], - "functions": [ - { - "name": "_get_compiler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n - python setup.py build_ext --compiler=\n - CC= python setup.py build_ext" - }, - { - "name": "compile_test_program", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that some C code can be compiled and run" - }, - { - "name": "basic_check_build", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check basic compilation and linking of C code" - } - ] - }, - { - "name": "sklearn._build_utils", - "imports": [ - "import os", - "import sklearn", - "import contextlib", - "from distutils.version import LooseVersion", - "from pre_build_helpers import basic_check_build", - "from openmp_helpers import check_openmp_support", - "from _min_dependencies import CYTHON_MIN_VERSION", - "import Cython", - "from Cython.Build import cythonize", - "import joblib", - "from Cython import Tempita" - ], - "classes": [], - "functions": [ - { - "name": "_check_cython_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "cythonize_extensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that a recent Cython is available and cythonize extensions" - }, - { - "name": "gen_from_templates", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate cython files from a list of templates" - } - ] - }, - { - "name": "sklearn._loss.glm_distribution", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from collections import namedtuple", - "import numbers", - "import numpy as np", - "from scipy.special import xlogy" - ], - "classes": [ - { - "name": "ExponentialDispersionModel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "in_y_range", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns ``True`` if y is in the valid range of Y~EDM.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values." - }, - { - "name": "unit_variance", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Predicted mean." - }, - { - "name": "unit_deviance", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True raise an exception on invalid y or y_pred values, otherwise they will be propagated as NaN." - }, - { - "name": "Returns", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "-------", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "deviance: array of shape (n_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Computed deviance" - }, - { - "name": ")", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Computed deviance" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the unit deviance.\n\nThe unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\nlog-likelihood as\n:math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\ncheck_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\nReturns\n-------\ndeviance: array of shape (n_samples,)\n Computed deviance" - }, - { - "name": "unit_deviance_derivative", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean." - }, - { - "name": "deviance", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - }, - { - "name": "weights", - "type": "Union[Array, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Weights or exposure to which variance is inverse proportional." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the deviance.\n\nThe deviance is a weighted sum of the per sample unit deviances,\n:math:`D = \\sum_i s_i \\cdot d(y_i, y_\\textrm{pred}_i)`\nwith weights :math:`s_i` and unit deviance\n:math:`d(y,y_\\textrm{pred})`.\nIn terms of the log-likelihood it is :math:`D = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\frac{phi}{s})\n- loglike(y,y,\\frac{phi}{s})\\right)`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\nweights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional." - }, - { - "name": "deviance_derivative", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - }, - { - "name": "weights", - "type": "Union[Array, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Weights or exposure to which variance is inverse proportional." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the deviance w.r.t. y_pred.\n\nIt gives :math:`\\frac{\\partial}{\\partial y_\\textrm{pred}}\nD(y, \\y_\\textrm{pred}; weights)`.\n\nParameters\n----------\ny : array, shape (n_samples,)\n Target values.\n\ny_pred : array, shape (n_samples,)\n Predicted mean.\n\nweights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional." - } - ], - "docstring": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n = \\tilde{c}(y, \\phi)\n \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.\n\nMethods\n-------\ndeviance\ndeviance_derivative\nin_y_range\nunit_deviance\nunit_deviance_derivative\nunit_variance\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/Exponential_dispersion_model." - }, - { - "name": "TweedieDistribution", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "power", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The variance power of the `unit_variance` :math:`v(y_\\textrm{pred}) = y_\\textrm{pred}^{power}`. For ``0 { - cs.push(new PythonClass(c.name, c.decorators, [], c.docstring || "", c.methods)); + cs.push(new PythonClass(c.name, c.decorators, [], c.fullDocstring || "", c.methods)); }); let fs: PythonFunction[] = []; @@ -27,10 +27,10 @@ export default class PythonPackageBuilder { let ps: PythonParameter[] = []; // @ts-ignore f.parameters.forEach(p => { - ps.push(new PythonParameter(p.name, p.type, p.hasDefault, p.defaultValue, p.limitation, p.ignored, p.docstring)); + ps.push(new PythonParameter(p.name, p.type, p.hasDefault, p.defaultValue, p.limitation, p.ignored, p.description)); }); - fs.push(new PythonFunction(f.name, f.decorators, ps, f.hasReturnType, new PythonReturnType(), f.docstring)) + fs.push(new PythonFunction(f.name, f.decorators, ps, f.hasReturnType, new PythonReturnType(), f.fullDocstring)) }); ms.push(new PythonModule(m.name, m.imports, cs, fs)) diff --git a/client/src/model/PythonParameter.ts b/client/src/model/PythonParameter.ts index 6e04effe4..3710f5163 100644 --- a/client/src/model/PythonParameter.ts +++ b/client/src/model/PythonParameter.ts @@ -7,15 +7,15 @@ export default class PythonParameter { readonly defaultValue: string;//ToDo maybe new default class readonly limitation: null;//Todo ueberall null?? readonly ignored: boolean; - readonly docstring: string; + readonly description: string; - constructor(name: string, type: string, hasDefault: boolean, defaultValue: string, limitation: null, ignored: boolean, docstring: string) { + constructor(name: string, type: string, hasDefault: boolean, defaultValue: string, limitation: null, ignored: boolean, description: string) { this.name = name; this.type = type; this.hasDefault = hasDefault; this.defaultValue = defaultValue; this.limitation = limitation; this.ignored = ignored; - this.docstring = docstring; + this.description = description; } } \ No newline at end of file diff --git a/client/src/sklearn.json b/client/src/sklearn.json deleted file mode 100644 index 880c4981e..000000000 --- a/client/src/sklearn.json +++ /dev/null @@ -1,127935 +0,0 @@ -{ - "name": "sklearn", - "modules": [ - { - "name": "sklearn.base", - "imports": [ - "import copy", - "import warnings", - "from collections import defaultdict", - "import platform", - "import inspect", - "import re", - "import numpy as np", - "from None import __version__", - "from _config import get_config", - "from utils import _IS_32BIT", - "from utils._tags import _DEFAULT_TAGS", - "from utils._tags import _safe_tags", - "from utils.validation import check_X_y", - "from utils.validation import check_array", - "from utils._estimator_html_repr import estimator_html_repr", - "from utils.validation import _deprecate_positional_args", - "from utils._pprint import _EstimatorPrettyPrinter", - "from metrics import accuracy_score", - "from metrics import r2_score" - ], - "classes": [ - { - "name": "BaseEstimator", - "decorators": [], - "superclasses": ["hallo", "klasse"], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_get_param_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameter names for the estimator" - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters for this estimator.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [ - { - "name": "**params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nThe method works on simple estimators as well as on nested objects\n(such as :class:`~sklearn.pipeline.Pipeline`). The latter have\nparameters of the form ``__`` so that it's\npossible to update each component of a nested object.\n\nParameters\n----------\n**params : dict\n Estimator parameters.\n\nReturns\n-------\nself : estimator instance\n Estimator instance." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_n_features", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "reset", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, the `n_features_in_` attribute is set to `X.shape[1]`. If False and the attribute exists, then check that it is equal to `X.shape[1]`. If False and the attribute does *not* exist, then the check is skipped. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the `n_features_in_` attribute, or check against it.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\nreset : bool\n If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n If False and the attribute exists, then check that it is equal to\n `X.shape[1]`. If False and the attribute does *not* exist, then\n the check is skipped.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`." - }, - { - "name": "_validate_data", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "'no_validation'", - "limitation": null, - "ignored": false, - "docstring": "The targets. - If `None`, `check_array` is called on `X`. If the estimator's requires_y tag is True, then an error will be raised. - If `'no_validation'`, `check_array` is called on `X` and the estimator's requires_y tag is ignored. This is a default placeholder and is never meant to be explicitly set. - Otherwise, both `X` and `y` are checked with either `check_array` or `check_X_y` depending on `validate_separately`." - }, - { - "name": "reset", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to reset the `n_features_in_` attribute. If False, the input will be checked for consistency with data provided when reset was last True. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`." - }, - { - "name": "validate_separately", - "type": "Union[Literal[False], Tuple[]]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Only used if y is not None. If False, call validate_X_y(). Else, it must be a tuple of kwargs to be used for calling check_array() on X and y respectively." - }, - { - "name": "**check_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to :func:`sklearn.utils.check_array` or :func:`sklearn.utils.check_X_y`. Ignored if validate_separately is not False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate input data and set or check the `n_features_in_` attribute.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n The input samples.\ny : array-like of shape (n_samples,), default='no_validation'\n The targets.\n\n - If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n - If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n - Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`.\n\nreset : bool, default=True\n Whether to reset the `n_features_in_` attribute.\n If False, the input will be checked for consistency with data\n provided when reset was last True.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\nvalidate_separately : False or tuple of dicts, default=False\n Only used if y is not None.\n If False, call validate_X_y(). Else, it must be a tuple of kwargs\n to be used for calling check_array() on X and y respectively.\n**check_params : kwargs\n Parameters passed to :func:`sklearn.utils.check_array` or\n :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n is not False.\n\nReturns\n-------\nout : {ndarray, sparse matrix} or tuple of these\n The validated input. A tuple is returned if `y` is not None." - }, - { - "name": "_repr_html_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "HTML representation of estimator.\n\nThis is redundant with the logic of `_repr_mimebundle_`. The latter\nshould be favorted in the long term, `_repr_html_` is only\nimplemented for consumers who do not interpret `_repr_mimbundle_`." - }, - { - "name": "_repr_html_inner", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This function is returned by the @property `_repr_html_` to make\n`hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\non `get_config()[\"display\"]`." - }, - { - "name": "_repr_mimebundle_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mime bundle used by jupyter kernels to display estimator" - } - ], - "docstring": "Base class for all estimators in scikit-learn.\n\nNotes\n-----\nAll estimators should specify all the parameters that can be set\nat the class level in their ``__init__`` as explicit keyword\narguments (no ``*args`` or ``**kwargs``)." - }, - { - "name": "ClassifierMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for `X`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of ``self.predict(X)`` wrt. `y`." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin class for all classifiers in scikit-learn." - }, - { - "name": "RegressorMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples. For some estimators this may be a precomputed kernel matrix or a list of generic objects instead with shape ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values for `X`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the coefficient of determination :math:`R^2` of the\nprediction.\n\nThe coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,\nwhere :math:`u` is the residual sum of squares ``((y_true - y_pred)\n** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\ny_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\ncan be negative (because the model can be arbitrarily worse). A\nconstant model that always predicts the expected value of `y`,\ndisregarding the input features, would get a :math:`R^2` score of\n0.0.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples. For some estimators this may be a precomputed\n kernel matrix or a list of generic objects instead with shape\n ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\n is the number of samples used in the fitting for the estimator.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n :math:`R^2` of ``self.predict(X)`` wrt. `y`.\n\nNotes\n-----\nThe :math:`R^2` score used when calling ``score`` on a regressor uses\n``multioutput='uniform_average'`` from version 0.23 to keep consistent\nwith default value of :func:`~sklearn.metrics.r2_score`.\nThis influences the ``score`` method of all the multioutput\nregressors (except for\n:class:`~sklearn.multioutput.MultiOutputRegressor`)." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin class for all regression estimators in scikit-learn." - }, - { - "name": "ClusterMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform clustering on `X` and returns cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,), dtype=np.int64\n Cluster labels." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin class for all cluster estimators in scikit-learn." - }, - { - "name": "BiclusterMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "biclusters_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convenient way to get row and column indicators together.\n\nReturns the ``rows_`` and ``columns_`` members." - }, - { - "name": "get_indices", - "decorators": [], - "parameters": [ - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the cluster." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Row and column indices of the `i`'th bicluster.\n\nOnly works if ``rows_`` and ``columns_`` attributes exist.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nrow_ind : ndarray, dtype=np.intp\n Indices of rows in the dataset that belong to the bicluster.\ncol_ind : ndarray, dtype=np.intp\n Indices of columns in the dataset that belong to the bicluster." - }, - { - "name": "get_shape", - "decorators": [], - "parameters": [ - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the cluster." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Shape of the `i`'th bicluster.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nn_rows : int\n Number of rows in the bicluster.\n\nn_cols : int\n Number of columns in the bicluster." - }, - { - "name": "get_submatrix", - "decorators": [], - "parameters": [ - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the cluster." - }, - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the submatrix corresponding to bicluster `i`.\n\nParameters\n----------\ni : int\n The index of the cluster.\ndata : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nsubmatrix : ndarray of shape (n_rows, n_cols)\n The submatrix corresponding to bicluster `i`.\n\nNotes\n-----\nWorks with sparse matrices. Only works if ``rows_`` and\n``columns_`` attributes exist." - } - ], - "docstring": "Mixin class for all bicluster estimators in scikit-learn." - }, - { - "name": "TransformerMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (None for unsupervised transformations)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional fit parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit to data, then transform it.\n\nFits transformer to `X` and `y` with optional parameters `fit_params`\nand returns a transformed version of `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array." - } - ], - "docstring": "Mixin class for all transformers in scikit-learn." - }, - { - "name": "DensityMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the score of the model on the data `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nscore : float" - } - ], - "docstring": "Mixin class for all density estimators in scikit-learn." - }, - { - "name": "OutlierMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform fit on X and returns labels for X.\n\nReturns -1 for outliers and 1 for inliers.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n 1 for inliers, -1 for outliers." - } - ], - "docstring": "Mixin class for all outlier detection estimators in scikit-learn." - }, - { - "name": "MetaEstimatorMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "MultiOutputMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin to mark estimators that support multioutput." - }, - { - "name": "_UnstableArchMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mark estimators that are non-determinstic on 32bit or PowerPC" - } - ], - "functions": [ - { - "name": "clone", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": "Union[List, Tuple[], Set]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator or group of estimators to be cloned." - }, - { - "name": "safe", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If safe is False, clone will fall back to a deep copy on objects that are not estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Constructs a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It yields a new estimator\nwith the same parameters that has not been fitted on any data.\n\nIf the estimator's `random_state` parameter is an integer (or if the\nestimator doesn't have a `random_state` parameter), an *exact clone* is\nreturned: the clone and the original estimator will give the exact same\nresults. Otherwise, *statistical clone* is returned: the clone might\nyield different results from the original estimator. More details can be\nfound in :ref:`randomness`.\n\nParameters\n----------\nestimator : {list, tuple, set} of estimator instance or a single estimator instance\n The estimator or group of estimators to be cloned.\n\nsafe : bool, default=True\n If safe is False, clone will fall back to a deep copy on objects\n that are not estimators." - }, - { - "name": "_pprint", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary to pretty print" - }, - { - "name": "offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The offset in characters to add at the begin of each line." - }, - { - "name": "printer", - "type": "Callable", - "hasDefault": true, - "default": "repr", - "limitation": null, - "ignored": false, - "docstring": "The function to convert entries to strings, typically the builtin str or repr" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pretty print the dictionary 'params'\n\nParameters\n----------\nparams : dict\n The dictionary to pretty print\n\noffset : int, default=0\n The offset in characters to add at the begin of each line.\n\nprinter : callable, default=repr\n The function to convert entries to strings, typically\n the builtin str or repr" - }, - { - "name": "is_classifier", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the given estimator is (probably) a classifier.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a classifier and False otherwise." - }, - { - "name": "is_regressor", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the given estimator is (probably) a regressor.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a regressor and False otherwise." - }, - { - "name": "is_outlier_detector", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the given estimator is (probably) an outlier detector.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is an outlier detector and False otherwise." - }, - { - "name": "_is_pairwise", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns True if estimator is pairwise.\n\n- If the `_pairwise` attribute and the tag are present and consistent,\n then use the value and not issue a warning.\n- If the `_pairwise` attribute and the tag are present and not\n consistent, use the `_pairwise` value and issue a deprecation\n warning.\n- If only the `_pairwise` attribute is present and it is not False,\n issue a deprecation warning and use the `_pairwise` value.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if the estimator is pairwise and False otherwise." - } - ] - }, - { - "name": "sklearn.calibration", - "imports": [ - "import warnings", - "from inspect import signature", - "from contextlib import suppress", - "from functools import partial", - "from math import log", - "import numpy as np", - "from joblib import Parallel", - "from scipy.special import expit", - "from scipy.special import xlogy", - "from scipy.optimize import fmin_bfgs", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import clone", - "from base import MetaEstimatorMixin", - "from preprocessing import label_binarize", - "from preprocessing import LabelEncoder", - "from utils import check_array", - "from utils import column_or_1d", - "from utils import deprecated", - "from utils import indexable", - "from utils.multiclass import check_classification_targets", - "from utils.fixes import delayed", - "from utils.validation import check_is_fitted", - "from utils.validation import check_consistent_length", - "from utils.validation import _check_sample_weight", - "from pipeline import Pipeline", - "from isotonic import IsotonicRegression", - "from svm import LinearSVC", - "from model_selection import check_cv", - "from model_selection import cross_val_predict", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "CalibratedClassifierCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The classifier whose output need to be calibrated to provide more accurate `predict_proba` outputs. The default classifier is a :class:`~sklearn.svm.LinearSVC`." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": true, - "default": "'sigmoid'", - "limitation": null, - "ignored": false, - "docstring": "The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method (i.e. a logistic regression model) or 'isotonic' which is a non-parametric approach. It is not advised to use isotonic calibration with too few calibration samples ``(<<1000)`` since it tends to overfit." - }, - { - "name": "cv", - "type": "Union[Literal[\"prefit\"], int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` is used. Refer to the :ref:`User Guide ` for the various cross-validation strategies that can be used here. If \"prefit\" is passed, it is assumed that `base_estimator` has been fitted already and all data is used for calibration. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Base estimator clones are fitted in parallel across cross-validation iterations. Therefore parallelism happens only when `cv != \"prefit\"`. See :term:`Glossary ` for more details. .. versionadded:: 0.24" - }, - { - "name": "ensemble", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Determines how the calibrator is fitted when `cv` is not `'prefit'`. Ignored if `cv='prefit'`. If `True`, the `base_estimator` is fitted using training data and calibrated using testing data, for each `cv` fold. The final estimator is an ensemble of `n_cv` fitted classifer and calibrator pairs, where `n_cv` is the number of cross-validation folds. The output is the average predicted probabilities of all pairs. If `False`, `cv` is used to compute unbiased predictions, via :func:`~sklearn.model_selection.cross_val_predict`, which are then used for calibration. At prediction time, the classifier used is the `base_estimator` trained on all the data. Note that this method is also internally implemented in :mod:`sklearn.svm` estimators with the `probabilities=True` parameter. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the calibrated model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n The predicted probas." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the target of new samples. The predicted class is the\nclass that has the highest probability, and can thus be different\nfrom the prediction of the uncalibrated classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n The predicted class." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator instance, default=None\n The classifier whose output need to be calibrated to provide more\n accurate `predict_proba` outputs. The default classifier is\n a :class:`~sklearn.svm.LinearSVC`.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method (i.e. a logistic regression model) or\n 'isotonic' which is a non-parametric approach. It is not advised to\n use isotonic calibration with too few calibration samples\n ``(<<1000)`` since it tends to overfit.\n\ncv : int, cross-validation generator, iterable or \"prefit\", default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n is used.\n\n Refer to the :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n If \"prefit\" is passed, it is assumed that `base_estimator` has been\n fitted already and all data is used for calibration.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors.\n\n Base estimator clones are fitted in parallel across cross-validation\n iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n See :term:`Glossary ` for more details.\n\n .. versionadded:: 0.24\n\nensemble : bool, default=True\n Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n Ignored if `cv='prefit'`.\n\n If `True`, the `base_estimator` is fitted using training data and\n calibrated using testing data, for each `cv` fold. The final estimator\n is an ensemble of `n_cv` fitted classifer and calibrator pairs, where\n `n_cv` is the number of cross-validation folds. The output is the\n average predicted probabilities of all pairs.\n\n If `False`, `cv` is used to compute unbiased predictions, via\n :func:`~sklearn.model_selection.cross_val_predict`, which are then\n used for calibration. At prediction time, the classifier used is the\n `base_estimator` trained on all the data.\n Note that this method is also internally implemented in\n :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The class labels.\n\ncalibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` or `ensemble=False`)\n The list of classifier and calibrator pairs.\n\n - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n calibrator.\n - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n `base_estimator` and calibrator pairs. `n_cv` is the number of\n cross-validation folds.\n - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n fitted on all the data, and fitted calibrator.\n\n .. versionchanged:: 0.24\n Single calibrated classifier case when `ensemble=False`.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.calibration import CalibratedClassifierCV\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> base_clf = GaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n>>> calibrated_clf.fit(X, y)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n>>> len(calibrated_clf.calibrated_classifiers_)\n3\n>>> calibrated_clf.predict_proba(X)[:5, :]\narray([[0.110..., 0.889...],\n [0.072..., 0.927...],\n [0.928..., 0.071...],\n [0.928..., 0.071...],\n [0.071..., 0.928...]])\n\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> X_train, X_calib, y_train, y_calib = train_test_split(\n... X, y, random_state=42\n... )\n>>> base_clf = GaussianNB()\n>>> base_clf.fit(X_train, y_train)\nGaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(\n... base_estimator=base_clf,\n... cv=\"prefit\"\n... )\n>>> calibrated_clf.fit(X_calib, y_calib)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n>>> len(calibrated_clf.calibrated_classifiers_)\n1\n>>> calibrated_clf.predict_proba([[-0.5, 0.5]])\narray([[0.936..., 0.063...]])\n\nReferences\n----------\n.. [1] Obtaining calibrated probability estimates from decision trees\n and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n.. [2] Transforming Classifier Scores into Accurate Multiclass\n Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n Regularized Likelihood Methods, J. Platt, (1999)\n\n.. [4] Predicting Good Probabilities with Supervised Learning,\n A. Niculescu-Mizil & R. Caruana, ICML 2005" - }, - { - "name": "_CalibratedClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier." - }, - { - "name": "calibrators", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of fitted calibrators (either 'IsotonicRegression' or '_SigmoidCalibration'). The number of calibrators equals the number of classes. However, if there are 2 classes, the list contains only one fitted calibrator." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "All the prediction classes." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": true, - "default": "'sigmoid'", - "limitation": null, - "ignored": false, - "docstring": "The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method or 'isotonic' which is a non-parametric approach based on isotonic regression." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "calibrators_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The sample data.\n\nReturns\n-------\nproba : array, shape (n_samples, n_classes)\n The predicted probabilities. Can be exact zeros." - } - ], - "docstring": "Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\nParameters\n----------\nbase_estimator : estimator instance\n Fitted classifier.\n\ncalibrators : list of fitted estimator instances\n List of fitted calibrators (either 'IsotonicRegression' or\n '_SigmoidCalibration'). The number of calibrators equals the number of\n classes. However, if there are 2 classes, the list contains only one\n fitted calibrator.\n\nclasses : array-like of shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method or 'isotonic' which is a\n non-parametric approach based on isotonic regression.\n\nAttributes\n----------\ncalibrators_ : list of fitted estimator instances\n Same as `calibrators`. Exposed for backward-compatibility. Use\n `calibrators` instead.\n\n .. deprecated:: 0.24\n `calibrators_` is deprecated from 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `calibrators` instead." - }, - { - "name": "_SigmoidCalibration", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training target." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "T", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,)\n Data to predict from.\n\nReturns\n-------\nT_ : ndarray of shape (n_samples,)\n The predicted data." - } - ], - "docstring": "Sigmoid regression model.\n\nAttributes\n----------\na_ : float\n The slope.\n\nb_ : float\n The intercept." - } - ], - "functions": [ - { - "name": "_fit_classifier_calibrator_pair", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cloned base estimator." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - }, - { - "name": "train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the training subset." - }, - { - "name": "test", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the testing subset." - }, - { - "name": "supports_sw", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the `estimator` supports sample weights." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method to use for calibration." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target classes." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights for `X`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit a classifier/calibration pair on a given train/test split.\n\nFit the classifier on the train set, compute its predictions on the test\nset and use the predictions as input to fit the calibrator along with the\ntest labels.\n\nParameters\n----------\nestimator : estimator instance\n Cloned base estimator.\n\nX : array-like, shape (n_samples, n_features)\n Sample data.\n\ny : array-like, shape (n_samples,)\n Targets.\n\ntrain : ndarray, shape (n_train_indicies,)\n Indices of the training subset.\n\ntest : ndarray, shape (n_test_indicies,)\n Indices of the testing subset.\n\nsupports_sw : bool\n Whether or not the `estimator` supports sample weights.\n\nmethod : {'sigmoid', 'isotonic'}\n Method to use for calibration.\n\nclasses : ndarray, shape (n_classes,)\n The target classes.\n\nsample_weight : array-like, default=None\n Sample weights for `X`.\n\nReturns\n-------\ncalibrated_classifier : _CalibratedClassifier instance" - }, - { - "name": "_get_prediction_method", - "decorators": [], - "parameters": [ - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier to obtain the prediction method from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.\n\nParameters\n----------\nclf : Estimator instance\n Fitted classifier to obtain the prediction method from.\n\nReturns\n-------\nprediction_method : callable\n The prediction method." - }, - { - "name": "_compute_predictions", - "decorators": [], - "parameters": [ - { - "name": "pred_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prediction method." - }, - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data used to obtain predictions." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes present." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return predictions for `X` and reshape binary outputs to shape\n(n_samples, 1).\n\nParameters\n----------\npred_method : callable\n Prediction method.\n\nX : array-like or None\n Data used to obtain predictions.\n\nn_classes : int\n Number of classes present.\n\nReturns\n-------\npredictions : array-like, shape (X.shape[0], len(clf.classes_))\n The predictions. Note if there are 2 classes, array is of shape\n (X.shape[0], 1)." - }, - { - "name": "_fit_calibrator", - "decorators": [], - "parameters": [ - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier." - }, - { - "name": "predictions", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Raw predictions returned by the un-calibrated base classifier." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The targets." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "All the prediction classes." - }, - { - "name": "method", - "type": "Literal['sigmoid', 'isotonic']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The method to use for calibration." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.\n\nParameters\n----------\nclf : estimator instance\n Fitted classifier.\n\npredictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) when binary.\n Raw predictions returned by the un-calibrated base classifier.\n\ny : array-like, shape (n_samples,)\n The targets.\n\nclasses : ndarray, shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}\n The method to use for calibration.\n\nsample_weight : ndarray, shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\npipeline : _CalibratedClassifier instance" - }, - { - "name": "_sigmoid_calibration", - "decorators": [], - "parameters": [ - { - "name": "predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision function or predict proba for the samples." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The targets." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability Calibration with sigmoid method (Platt 2000)\n\nParameters\n----------\npredictions : ndarray of shape (n_samples,)\n The decision function or predict proba for the samples.\n\ny : ndarray of shape (n_samples,)\n The targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\na : float\n The slope.\n\nb : float\n The intercept.\n\nReferences\n----------\nPlatt, \"Probabilistic Outputs for Support Vector Machines\"" - }, - { - "name": "calibration_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets." - }, - { - "name": "y_prob", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Probabilities of the positive class." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether y_prob needs to be normalized into the [0, 1] interval, i.e. is not a proper probability. If True, the smallest value in y_prob is linearly mapped onto 0 and the largest one onto 1." - }, - { - "name": "n_bins", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of bins to discretize the [0, 1] interval. A bigger number requires more data. Bins with no samples (i.e. without corresponding values in `y_prob`) will not be returned, thus the returned arrays may have less than `n_bins` values." - }, - { - "name": "strategy", - "type": "Literal['uniform', 'quantile']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Strategy used to define the widths of the bins. uniform The bins have identical widths. quantile The bins have the same number of samples and depend on `y_prob`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute true and predicted probabilities for a calibration curve.\n\nThe method assumes the inputs come from a binary classifier, and\ndiscretize the [0, 1] interval into bins.\n\nCalibration curves may also be referred to as reliability diagrams.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True targets.\n\ny_prob : array-like of shape (n_samples,)\n Probabilities of the positive class.\n\nnormalize : bool, default=False\n Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\n is not a proper probability. If True, the smallest value in y_prob\n is linearly mapped onto 0 and the largest one onto 1.\n\nn_bins : int, default=5\n Number of bins to discretize the [0, 1] interval. A bigger number\n requires more data. Bins with no samples (i.e. without\n corresponding values in `y_prob`) will not be returned, thus the\n returned arrays may have less than `n_bins` values.\n\nstrategy : {'uniform', 'quantile'}, default='uniform'\n Strategy used to define the widths of the bins.\n\n uniform\n The bins have identical widths.\n quantile\n The bins have the same number of samples and depend on `y_prob`.\n\nReturns\n-------\nprob_true : ndarray of shape (n_bins,) or smaller\n The proportion of samples whose class is the positive class, in each\n bin (fraction of positives).\n\nprob_pred : ndarray of shape (n_bins,) or smaller\n The mean predicted probability in each bin.\n\nReferences\n----------\nAlexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good\nProbabilities With Supervised Learning, in Proceedings of the 22nd\nInternational Conference on Machine Learning (ICML).\nSee section 4 (Qualitative Analysis of Predictions).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.calibration import calibration_curve\n>>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1])\n>>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.])\n>>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3)\n>>> prob_true\narray([0. , 0.5, 1. ])\n>>> prob_pred\narray([0.2 , 0.525, 0.85 ])" - } - ] - }, - { - "name": "sklearn.conftest", - "imports": [ - "import os", - "from os import environ", - "from functools import wraps", - "import pytest", - "from threadpoolctl import threadpool_limits", - "from sklearn.utils._openmp_helpers import _openmp_effective_n_threads", - "from sklearn.datasets import fetch_20newsgroups", - "from sklearn.datasets import fetch_20newsgroups_vectorized", - "from sklearn.datasets import fetch_california_housing", - "from sklearn.datasets import fetch_covtype", - "from sklearn.datasets import fetch_kddcup99", - "from sklearn.datasets import fetch_olivetti_faces", - "from sklearn.datasets import fetch_rcv1" - ], - "classes": [], - "functions": [ - { - "name": "_fetch_fixture", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fetch dataset (download if missing and requested by environment)." - }, - { - "name": "pytest_collection_modifyitems", - "decorators": [], - "parameters": [ - { - "name": "config", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "items", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called after collect is completed.\n\nParameters\n----------\nconfig : pytest config\nitems : list of collected items" - }, - { - "name": "pyplot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Setup and teardown fixture for matplotlib.\n\nThis fixture checks if we can import matplotlib. If not, the tests will be\nskipped. Otherwise, we setup matplotlib backend and close the figures\nafter running the functions.\n\nReturns\n-------\npyplot : module\n The ``matplotlib.pyplot`` module." - }, - { - "name": "pytest_runtest_setup", - "decorators": [], - "parameters": [ - { - "name": "item", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "item to be processed" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the number of openmp threads based on the number of workers\nxdist is using to prevent oversubscription.\n\nParameters\n----------\nitem : pytest item\n item to be processed" - } - ] - }, - { - "name": "sklearn.discriminant_analysis", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy.special import expit", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from base import ClassifierMixin", - "from linear_model._base import LinearClassifierMixin", - "from covariance import ledoit_wolf", - "from covariance import empirical_covariance", - "from covariance import shrunk_covariance", - "from utils.multiclass import unique_labels", - "from utils import check_array", - "from utils.validation import check_is_fitted", - "from utils.multiclass import check_classification_targets", - "from utils.extmath import softmax", - "from preprocessing import StandardScaler", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "LinearDiscriminantAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "solver", - "type": "Literal['svd', 'lsqr', 'eigen']", - "hasDefault": true, - "default": "'svd'", - "limitation": null, - "ignored": false, - "docstring": "Solver to use, possible values: - 'svd': Singular value decomposition (default). Does not compute the covariance matrix, therefore this solver is recommended for data with a large number of features. - 'lsqr': Least squares solution. Can be combined with shrinkage or custom covariance estimator. - 'eigen': Eigenvalue decomposition. Can be combined with shrinkage or custom covariance estimator." - }, - { - "name": "shrinkage", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. This should be left to None if `covariance_estimator` is used. Note that shrinkage works only with 'lsqr' and 'eigen' solvers." - }, - { - "name": "priors", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class prior probabilities. By default, the class proportions are inferred from the training data." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components (<= min(n_classes - 1, n_features)) for dimensionality reduction. If None, will be set to min(n_classes - 1, n_features). This parameter only affects the `transform` method." - }, - { - "name": "store_covariance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, explicitely compute the weighted within-class covariance matrix when solver is 'svd'. The matrix is always computed and stored for the other solvers. .. versionadded:: 0.17" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Absolute threshold for a singular value of X to be considered significant, used to estimate the rank of X. Dimensions whose singular values are non-significant are discarded. Only used if solver is 'svd'. .. versionadded:: 0.17" - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying on the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in :mod:`sklearn.covariance`. if None the shrinkage parameter drives the estimate. This should be left to None if `shrinkage` is used. Note that `covariance_estimator` works only with 'lsqr' and 'eigen' solvers. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_lsqr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "shrinkage", - "type": "Optional[Union[Literal['auto'], float]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage. - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Shrinkage parameter is ignored if `covariance_estimator` i not None" - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. if None the shrinkage parameter drives the estimate. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 2.6.2, pp. 39-41.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3." - }, - { - "name": "_solve_eigen", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "shrinkage", - "type": "Optional[Union[Literal['auto'], float]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage. - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage constant. Shrinkage parameter is ignored if `covariance_estimator` i not None" - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. if None the shrinkage parameter drives the estimate. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Eigenvalue solver.\n\nThe eigenvalue solver computes the optimal solution of the Rayleigh\ncoefficient (basically the ratio of between class scatter to within\nclass scatter). This solver supports both classification and\ndimensionality reduction (with any covariance estimator).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 3.8.3, pp. 121-124.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3." - }, - { - "name": "_solve_svd", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "SVD solver.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project data to maximize class separation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated probabilities." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate log probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated log probabilities." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples (test vectors)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class." - } - ], - "docstring": "Linear Discriminant Analysis\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n Solver to use, possible values:\n - 'svd': Singular value decomposition (default).\n Does not compute the covariance matrix, therefore this solver is\n recommended for data with a large number of features.\n - 'lsqr': Least squares solution.\n Can be combined with shrinkage or custom covariance estimator.\n - 'eigen': Eigenvalue decomposition.\n Can be combined with shrinkage or custom covariance estimator.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n This should be left to None if `covariance_estimator` is used.\n Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n The class prior probabilities. By default, the class proportions are\n inferred from the training data.\n\nn_components : int, default=None\n Number of components (<= min(n_classes - 1, n_features)) for\n dimensionality reduction. If None, will be set to\n min(n_classes - 1, n_features). This parameter only affects the\n `transform` method.\n\nstore_covariance : bool, default=False\n If True, explicitely compute the weighted within-class covariance\n matrix when solver is 'svd'. The matrix is always computed\n and stored for the other solvers.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value of X to be considered\n significant, used to estimate the rank of X. Dimensions whose\n singular values are non-significant are discarded. Only used if\n solver is 'svd'.\n\n .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance`.\n if None the shrinkage parameter drives the estimate.\n\n This should be left to None if `shrinkage` is used.\n Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n solvers.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix. It corresponds to\n `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n samples in class `k`. The `C_k` are estimated using the (potentially\n shrunk) biased estimator of covariance. If solver is 'svd', only\n exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If ``n_components`` is not set then all components are stored and the\n sum of explained variances is equal to 1.0. Only available when eigen\n or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n Scaling of the features in the space spanned by the class centroids.\n Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]" - }, - { - "name": "QuadraticDiscriminantAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "priors", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Class priors. By default, the class proportions are inferred from the training data." - }, - { - "name": "reg_param", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Regularizes the per-class covariance estimates by transforming S2 as ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``, where S2 corresponds to the `scaling_` attribute of a given class." - }, - { - "name": "store_covariance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the class covariance matrices are explicitely computed and stored in the `self.covariance_` attribute. .. versionadded:: 0.17" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Absolute threshold for a singular value to be considered significant, used to estimate the rank of `Xk` where `Xk` is the centered matrix of samples in class k. This parameter does not affect the predictions. It only controls a warning that is raised when features are considered to be colinear. .. versionadded:: 0.17" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers)" - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples (test vectors)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples/test vectors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior probabilities of classification per class." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of samples/test vectors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return log of posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior log-probabilities of classification per class." - } - ], - "docstring": "Quadratic Discriminant Analysis\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : ndarray of shape (n_classes,), default=None\n Class priors. By default, the class proportions are inferred from the\n training data.\n\nreg_param : float, default=0.0\n Regularizes the per-class covariance estimates by transforming S2 as\n ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n If True, the class covariance matrices are explicitely computed and\n stored in the `self.covariance_` attribute.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value to be considered significant,\n used to estimate the rank of `Xk` where `Xk` is the centered matrix\n of samples in class k. This parameter does not affect the\n predictions. It only controls a warning that is raised when features\n are considered to be colinear.\n\n .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray of shape (n_features, n_features)\n For each class, gives the covariance matrix estimated using the\n samples of that class. The estimations are unbiased. Only present if\n `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n For each class k an array of shape (n_features, n_k), where\n ``n_k = min(n_features, number of elements in class k)``\n It is the rotation of the Gaussian distribution, i.e. its\n principal axis. It corresponds to `V`, the matrix of eigenvectors\n coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n For each class, contains the scaling of\n the Gaussian distributions along its principal axes, i.e. the\n variance in the rotated coordinate system. It corresponds to `S^2 /\n (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n Unique class labels.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis." - } - ], - "functions": [ - { - "name": "_cov", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n Shrinkage parameter, possible values:\n - None or 'empirical': no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator`\n is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance``.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ns : ndarray of shape (n_features, n_features)\n Estimated covariance matrix." - }, - { - "name": "_class_means", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute class means.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nReturns\n-------\nmeans : array-like of shape (n_classes, n_features)\n Class means." - }, - { - "name": "_class_cov", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "priors", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Class priors." - }, - { - "name": "shrinkage", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Shrinkage parameter, possible values: - None: no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Shrinkage parameter is ignored if `covariance_estimator` is not None." - }, - { - "name": "covariance_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. If None, the shrinkage parameter drives the estimate. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute weighted within-class covariance matrix.\n\nThe per-class covariance are weighted by the class priors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\npriors : array-like of shape (n_classes,)\n Class priors.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n If None, the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncov : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix" - } - ] - }, - { - "name": "sklearn.dummy", - "imports": [ - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import check_random_state", - "from utils.validation import _num_samples", - "from utils.validation import check_array", - "from utils.validation import check_consistent_length", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.random import _random_choice_csc", - "from utils.stats import _weighted_percentile", - "from utils.multiclass import class_distribution", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "DummyClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "strategy", - "type": "Literal[\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"]", - "hasDefault": true, - "default": "\"prior\"", - "limitation": null, - "ignored": false, - "docstring": "Strategy to use to generate predictions. * \"stratified\": generates predictions by respecting the training set's class distribution. * \"most_frequent\": always predicts the most frequent label in the training set. * \"prior\": always predicts the class that maximizes the class prior (like \"most_frequent\") and ``predict_proba`` returns the class prior. * \"uniform\": generates predictions uniformly at random. * \"constant\": always predicts a constant label that is provided by the user. This is useful for metrics that evaluate a non-majority class .. versionchanged:: 0.24 The default value of `strategy` has changed to \"prior\" in version 0.24." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness to generate the predictions when ``strategy='stratified'`` or ``strategy='uniform'``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "constant", - "type": "Union[str, int, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The explicit constant as predicted by the \"constant\" strategy. This parameter is useful only for the \"constant\" strategy." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the random classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the probability of the sample for each class in\n the model, where classes are ordered arithmetically, for each\n output." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, requires length = n_samples" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return log probability estimates for the test vectors X.\n\nParameters\n----------\nX : {array-like, object with finite length or shape}\n Training data, requires length = n_samples\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the log probability of the sample for each class in\n the model, where classes are ordered arithmetically for each\n output." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples. Passing None as test samples gives the same result as passing real test samples, since DummyClassifier operates independently of the sampled observations." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyClassifier\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) wrt. y." - } - ], - "docstring": "DummyClassifier is a classifier that makes predictions using simple rules.\n\nThis classifier is useful as a simple baseline to compare with other\n(real) classifiers. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"}, default=\"prior\"\n Strategy to use to generate predictions.\n\n * \"stratified\": generates predictions by respecting the training\n set's class distribution.\n * \"most_frequent\": always predicts the most frequent label in the\n training set.\n * \"prior\": always predicts the class that maximizes the class prior\n (like \"most_frequent\") and ``predict_proba`` returns the class prior.\n * \"uniform\": generates predictions uniformly at random.\n * \"constant\": always predicts a constant label that is provided by\n the user. This is useful for metrics that evaluate a non-majority\n class\n\n .. versionchanged:: 0.24\n The default value of `strategy` has changed to \"prior\" in version\n 0.24.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness to generate the predictions when\n ``strategy='stratified'`` or ``strategy='uniform'``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nconstant : int or str or array-like of shape (n_outputs,)\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of such arrays\n Class labels for each output.\n\nn_classes_ : int or list of int\n Number of label for each output.\n\nclass_prior_ : ndarray of shape (n_classes,) or list of such arrays\n Probability of each class for each output.\n\nn_outputs_ : int\n Number of outputs.\n\nsparse_output_ : bool\n True if the array returned from predict is to be in sparse CSC format.\n Is automatically set to True if the input y is passed in sparse format.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyClassifier\n>>> X = np.array([-1, 1, 1, 1])\n>>> y = np.array([0, 1, 1, 1])\n>>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n>>> dummy_clf.fit(X, y)\nDummyClassifier(strategy='most_frequent')\n>>> dummy_clf.predict(X)\narray([1, 1, 1, 1])\n>>> dummy_clf.score(X, y)\n0.75" - }, - { - "name": "DummyRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "strategy", - "type": "Literal[\"mean\", \"median\", \"quantile\", \"constant\"]", - "hasDefault": true, - "default": "\"mean\"", - "limitation": null, - "ignored": false, - "docstring": "Strategy to use to generate predictions. * \"mean\": always predicts the mean of the training set * \"median\": always predicts the median of the training set * \"quantile\": always predicts a specified quantile of the training set, provided with the quantile parameter. * \"constant\": always predicts a constant value that is provided by the user." - }, - { - "name": "constant", - "type": "Union[float, int, ArrayLike]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The explicit constant as predicted by the \"constant\" strategy. This parameter is useful only for the \"constant\" strategy." - }, - { - "name": "quantile", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The quantile to predict using the \"quantile\" strategy. A quantile of 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the maximum." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the random regressor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the standard deviation of posterior prediction. All zeros in this case. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n All zeros in this case.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n\ny_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Standard deviation of predictive distribution of query points." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples. Passing None as test samples gives the same result as passing real test samples, since DummyRegressor operates independently of the sampled observations." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the coefficient of determination R^2 of the prediction.\n\nThe coefficient R^2 is defined as (1 - u/v), where u is the residual\nsum of squares ((y_true - y_pred) ** 2).sum() and v is the total\nsum of squares ((y_true - y_true.mean()) ** 2).sum().\nThe best possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyRegressor\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n R^2 of self.predict(X) wrt. y." - } - ], - "docstring": "DummyRegressor is a regressor that makes predictions using\nsimple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n Strategy to use to generate predictions.\n\n * \"mean\": always predicts the mean of the training set\n * \"median\": always predicts the median of the training set\n * \"quantile\": always predicts a specified quantile of the training set,\n provided with the quantile parameter.\n * \"constant\": always predicts a constant value that is provided by\n the user.\n\nconstant : int or float or array-like of shape (n_outputs,), default=None\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nquantile : float in [0.0, 1.0], default=None\n The quantile to predict using the \"quantile\" strategy. A quantile of\n 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n maximum.\n\nAttributes\n----------\nconstant_ : ndarray of shape (1, n_outputs)\n Mean or median or quantile of the training targets or constant value\n given by the user.\n\nn_outputs_ : int\n Number of outputs.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyRegressor\n>>> X = np.array([1.0, 2.0, 3.0, 4.0])\n>>> y = np.array([2.0, 3.0, 5.0, 10.0])\n>>> dummy_regr = DummyRegressor(strategy=\"mean\")\n>>> dummy_regr.fit(X, y)\nDummyRegressor()\n>>> dummy_regr.predict(X)\narray([5., 5., 5., 5.])\n>>> dummy_regr.score(X, y)\n0.0" - } - ], - "functions": [] - }, - { - "name": "sklearn.exceptions", - "imports": [ - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "NotFittedError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Exception class to raise if estimator is used before fitting.\n\nThis class inherits from both ValueError and AttributeError to help with\nexception handling and backward compatibility.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.exceptions import NotFittedError\n>>> try:\n... LinearSVC().predict([[1, 2], [2, 3], [3, 4]])\n... except NotFittedError as e:\n... print(repr(e))\nNotFittedError(\"This LinearSVC instance is not fitted yet. Call 'fit' with\nappropriate arguments before using this estimator.\"...)\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation." - }, - { - "name": "ChangedBehaviorWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning class used to notify the user of any change in the behavior.\n\n.. versionchanged:: 0.18\n Moved from sklearn.base." - }, - { - "name": "ConvergenceWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils." - }, - { - "name": "DataConversionWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n - passes an integer array to a function which expects float input and\n will convert the input\n - requests a non-copying operation, but a copy is required to meet the\n implementation's data-type expectations;\n - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation." - }, - { - "name": "DataDimensionalityWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils." - }, - { - "name": "EfficiencyWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used to notify the user of inefficient computation.\n\nThis warning notifies the user that the efficiency may not be optimal due\nto some reason which may be included as a part of the warning message.\nThis may be subclassed into a more specific Warning class.\n\n.. versionadded:: 0.18" - }, - { - "name": "FitFailedWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n Moved from sklearn.cross_validation." - }, - { - "name": "NonBLASDotWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used when the dot operation does not use BLAS.\n\nThis warning is used to notify the user that BLAS was not used for dot\noperation and hence the efficiency may be affected.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation, extends EfficiencyWarning." - }, - { - "name": "SkipTestWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning class used to notify the user of a test that was skipped.\n\nFor example, one of the estimator checks requires a pandas import.\nIf the pandas package cannot be imported, the test will be skipped rather\nthan register as a failure." - }, - { - "name": "UndefinedMetricWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n Moved from sklearn.base." - }, - { - "name": "PositiveSpectrumWarning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Warning raised when the eigenvalues of a PSD matrix have issues\n\nThis warning is typically raised by ``_check_psd_eigenvalues`` when the\neigenvalues of a positive semidefinite (PSD) matrix such as a gram matrix\n(kernel) present significant negative eigenvalues, or bad conditioning i.e.\nvery small non-zero eigenvalues compared to the largest eigenvalue.\n\n.. versionadded:: 0.22" - } - ], - "functions": [] - }, - { - "name": "sklearn.isotonic", - "imports": [ - "import numpy as np", - "from scipy import interpolate", - "from scipy.stats import spearmanr", - "import warnings", - "import math", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from base import RegressorMixin", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from _isotonic import _inplace_contiguous_isotonic_regression", - "from _isotonic import _make_unique" - ], - "classes": [ - { - "name": "IsotonicRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "y_min", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Lower bound on the lowest predicted value (the minimum value may still be higher). If not set, defaults to -inf." - }, - { - "name": "y_max", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on the highest predicted value (the maximum may still be lower). If not set, defaults to +inf." - }, - { - "name": "increasing", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the predictions should be constrained to increase or decrease with `X`. 'auto' will decide based on the Spearman correlation estimate's sign." - }, - { - "name": "out_of_bounds", - "type": "Literal['nan', 'clip', 'raise']", - "hasDefault": true, - "default": "'nan'", - "limitation": null, - "ignored": false, - "docstring": "Handles how `X` values outside of the training domain are handled during prediction. - 'nan', predictions will be NaN. - 'clip', predictions will be set to the value corresponding to the nearest train interval endpoint. - 'raise', a `ValueError` is raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_input_data_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build the f_ interp1d function." - }, - { - "name": "_build_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build the y_ IsotonicRegression." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. .. versionchanged:: 0.24 Also accepts 2d array with 1 feature." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training target." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights. If set to None, all weights will be set to 1 (equal weights)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,) or (n_samples, 1)\n Training data.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights. If set to None, all weights will be set to 1 (equal\n weights).\n\nReturns\n-------\nself : object\n Returns an instance of self.\n\nNotes\n-----\nX is stored for future use, as :meth:`transform` needs X to interpolate\nnew input data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "T", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform. .. versionchanged:: 0.24 Also accepts 2d array with 1 feature." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform new data by linear interpolation\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n The transformed data" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "T", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Transformed data." - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pickle-protocol - return state of the estimator. " - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pickle-protocol - set state of the estimator.\n\nWe need to rebuild the interpolation function." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool or 'auto', default=True\n Determines whether the predictions should be constrained to increase\n or decrease with `X`. 'auto' will decide based on the Spearman\n correlation estimate's sign.\n\nout_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n Handles how `X` values outside of the training domain are handled\n during prediction.\n\n - 'nan', predictions will be NaN.\n - 'clip', predictions will be set to the value corresponding to\n the nearest train interval endpoint.\n - 'raise', a `ValueError` is raised.\n\nAttributes\n----------\nX_min_ : float\n Minimum value of input array `X_` for left bound.\n\nX_max_ : float\n Maximum value of input array `X_` for right bound.\n\nX_thresholds_ : ndarray of shape (n_thresholds,)\n Unique ascending `X` values used to interpolate\n the y = f(X) monotonic function.\n\n .. versionadded:: 0.24\n\ny_thresholds_ : ndarray of shape (n_thresholds,)\n De-duplicated `y` values suitable to interpolate the y = f(X)\n monotonic function.\n\n .. versionadded:: 0.24\n\nf_ : function\n The stepwise interpolating function that covers the input domain ``X``.\n\nincreasing_ : bool\n Inferred value for ``increasing``.\n\nNotes\n-----\nTies are broken using the secondary method from de Leeuw, 1977.\n\nReferences\n----------\nIsotonic Median Regression: A Linear Programming Approach\nNilotpal Chakravarti\nMathematics of Operations Research\nVol. 14, No. 2 (May, 1989), pp. 303-308\n\nIsotone Optimization in R : Pool-Adjacent-Violators\nAlgorithm (PAVA) and Active Set Methods\nde Leeuw, Hornik, Mair\nJournal of Statistical Software 2009\n\nCorrectness of Kruskal's algorithms for monotone regression with ties\nde Leeuw, Psychometrica, 1977\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.isotonic import IsotonicRegression\n>>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n>>> iso_reg = IsotonicRegression().fit(X, y)\n>>> iso_reg.predict([.1, .2])\narray([1.8628..., 3.7256...])" - } - ], - "functions": [ - { - "name": "check_increasing", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training target." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine whether y is monotonically correlated with x.\n\ny is found increasing or decreasing with respect to x based on a Spearman\ncorrelation test.\n\nParameters\n----------\nx : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nReturns\n-------\nincreasing_bool : boolean\n Whether the relationship is increasing or decreasing.\n\nNotes\n-----\nThe Spearman correlation coefficient is estimated from the data, and the\nsign of the resulting estimate is used as the result.\n\nIn the event that the 95% confidence interval based on Fisher transform\nspans zero, a warning is raised.\n\nReferences\n----------\nFisher transformation. Wikipedia.\nhttps://en.wikipedia.org/wiki/Fisher_transformation" - }, - { - "name": "isotonic_regression", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights on each point of the regression. If None, weight is set to 1 (equal weights)." - }, - { - "name": "y_min", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Lower bound on the lowest predicted value (the minimum value may still be higher). If not set, defaults to -inf." - }, - { - "name": "y_max", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on the highest predicted value (the maximum may still be lower). If not set, defaults to +inf." - }, - { - "name": "increasing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to compute ``y_`` is increasing (if set to True) or decreasing (if set to False)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solve the isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n The data.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights on each point of the regression.\n If None, weight is set to 1 (equal weights).\n\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool, default=True\n Whether to compute ``y_`` is increasing (if set to True) or decreasing\n (if set to False)\n\nReturns\n-------\ny_ : list of floats\n Isotonic fit of y.\n\nReferences\n----------\n\"Active set algorithms for isotonic regression; A unifying framework\"\nby Michael J. Best and Nilotpal Chakravarti, section 3." - } - ] - }, - { - "name": "sklearn.kernel_approximation", - "imports": [ - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.linalg import svd", - "from scipy.fft import fft", - "from scipy.fft import ifft", - "from scipy.fftpack import fft", - "from scipy.fftpack import ifft", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils import as_float_array", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import check_is_fitted", - "from metrics.pairwise import pairwise_kernels", - "from metrics.pairwise import KERNEL_PARAMS", - "from utils.validation import check_non_negative", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "PolynomialCountSketch", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter of the polynomial kernel whose feature map will be approximated." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel whose feature map will be approximated." - }, - { - "name": "coef0", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant term of the polynomial kernel whose feature map will be approximated." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the output feature space. Usually, n_components should be greater than the number of features in input samples in order to achieve good performance. The optimal score / run time balance is typically achieved around n_components = 10 * n_features, but this depends on the specific dataset being used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for indexHash and bitHash initialization. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the feature map approximation for X.\n\nParameters\n----------\nX : {array-like}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - } - ], - "docstring": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of the polynomial kernel whose feature map\n will be approximated.\n\ndegree : int, default=2\n Degree of the polynomial kernel whose feature map\n will be approximated.\n\ncoef0 : int, default=0\n Constant term of the polynomial kernel whose feature map\n will be approximated.\n\nn_components : int, default=100\n Dimensionality of the output feature space. Usually, n_components\n should be greater than the number of features in input samples in\n order to achieve good performance. The optimal score / run time\n balance is typically achieved around n_components = 10 * n_features,\n but this depends on the specific dataset being used.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for indexHash and bitHash\n initialization. Pass an int for reproducible results across multiple\n function calls. See :term:`Glossary `.\n\nAttributes\n----------\nindexHash_ : ndarray of shape (degree, n_features), dtype=int64\n Array of indexes in range [0, n_components) used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nbitHash_ : ndarray of shape (degree, n_features), dtype=float32\n Array with random entries in {+1, -1}, used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import PolynomialCountSketch\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> ps = PolynomialCountSketch(degree=3, random_state=1)\n>>> X_features = ps.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0" - }, - { - "name": "RBFSampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter of RBF kernel: exp(-gamma * x^2)" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the generation of the random weights and random offset when fitting the training data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - } - ], - "docstring": "Approximates feature map of an RBF kernel by Monte Carlo approximation\nof its Fourier transform.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of RBF kernel: exp(-gamma * x^2)\n\nn_components : int, default=100\n Number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_offset_ : ndarray of shape (n_components,), dtype=float64\n Random offset used to compute the projection in the `n_components`\n dimensions of the feature space.\n\nrandom_weights_ : ndarray of shape (n_features, n_components), dtype=float64\n Random projection directions drawn from the Fourier transform\n of the RBF kernel.\n\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import RBFSampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n>>> X_features = rbf_feature.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=5)\n>>> clf.score(X_features, y)\n1.0\n\nNotes\n-----\nSee \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\nBenjamin Recht.\n\n[1] \"Weighted Sums of Random Kitchen Sinks: Replacing\nminimization with randomization in learning\" by A. Rahimi and\nBenjamin Recht.\n(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)" - }, - { - "name": "SkewedChi2Sampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "skewedness", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "\"skewedness\" parameter of the kernel. Needs to be cross-validated." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the generation of the random weights and random offset when fitting the training data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples in the number of samples and n_features is the number of features. All values of X must be strictly greater than \"-skewedness\"." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features. All values of X must be\n strictly greater than \"-skewedness\".\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - } - ], - "docstring": "Approximates feature map of the \"skewed chi-squared\" kernel by Monte\nCarlo approximation of its Fourier transform.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nskewedness : float, default=1.0\n \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\nn_components : int, default=100\n number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_weights_ : ndarray of shape (n_features, n_components)\n Weight array, sampled from a secant hyperbolic distribution, which will\n be used to linearly transform the log of the data.\n\nrandom_offset_ : ndarray of shape (n_features, n_components)\n Bias term, which will be added to the data. It is uniformly distributed\n between 0 and 2*pi.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import SkewedChi2Sampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n... n_components=10,\n... random_state=0)\n>>> X_features = chi2_feature.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0\n\nReferences\n----------\nSee \"Random Fourier Approximations for Skewed Multiplicative Histogram\nKernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\nSee Also\n--------\nAdditiveChi2Sampler : A different approach for approximating an additive\n variant of the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel." - }, - { - "name": "AdditiveChi2Sampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "sample_steps", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Gives the number of (complex) sampling points." - }, - { - "name": "sample_interval", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sampling interval. Must be specified when sample_steps not in {1,2,3}." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : {ndarray, sparse matrix}, shape = (n_samples, n_features * (2*sample_steps + 1))\n Whether the return value is an array of sparse matrix depends on\n the type of the input X." - }, - { - "name": "_transform_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately. Each entry in the original\nspace is transformed into 2*sample_steps+1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsample_steps : int, default=2\n Gives the number of (complex) sampling points.\nsample_interval : float, default=None\n Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\nAttributes\n----------\nsample_interval_ : float\n Stored sampling interval. Specified as a parameter if sample_steps not\n in {1,2,3}.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n>>> X, y = load_digits(return_X_y=True)\n>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n>>> X_transformed = chi2sampler.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n>>> clf.fit(X_transformed, y)\nSGDClassifier(max_iter=5, random_state=0)\n>>> clf.score(X_transformed, y)\n0.9499...\n\nNotes\n-----\nThis estimator approximates a slightly different version of the additive\nchi squared kernel then ``metric.additive_chi2`` computes.\n\nSee Also\n--------\nSkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\nsklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n squared kernel.\n\nReferences\n----------\nSee `\"Efficient additive kernels via explicit feature maps\"\n`_\nA. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n2011" - }, - { - "name": "Nystroem", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Kernel map to be approximated. A callable should accept two arguments and the keyword arguments passed to this object as kernel_params, and should return a floating point number." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "degree", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters (keyword arguments) for kernel function passed as callable object." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of features to construct. How many data points will be used to construct the mapping." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the uniform sampling without replacement of n_components of the training data to construct the basis kernel. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the kernel matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply feature map to X.\n\nComputes an approximate feature map using the kernel\nbetween some training points and X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform.\n\nReturns\n-------\nX_transformed : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "_get_kernel_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nkernel : string or callable, default='rbf'\n Kernel map to be approximated. A callable should accept two arguments\n and the keyword arguments passed to this object as kernel_params, and\n should return a floating point number.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ncoef0 : float, default=None\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\ndegree : float, default=None\n Degree of the polynomial kernel. Ignored by other kernels.\n\nkernel_params : dict, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nn_components : int, default=100\n Number of features to construct.\n How many data points will be used to construct the mapping.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the uniform sampling without\n replacement of n_components of the training data to construct the basis\n kernel.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the kernel matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Subset of training points used to construct the feature map.\n\ncomponent_indices_ : ndarray of shape (n_components)\n Indices of ``components_`` in the training set.\n\nnormalization_ : ndarray of shape (n_components, n_components)\n Normalization matrix needed for embedding.\n Square root of the kernel matrix on ``components_``.\n\nExamples\n--------\n>>> from sklearn import datasets, svm\n>>> from sklearn.kernel_approximation import Nystroem\n>>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n>>> data = X / 16.\n>>> clf = svm.LinearSVC()\n>>> feature_map_nystroem = Nystroem(gamma=.2,\n... random_state=1,\n... n_components=300)\n>>> data_transformed = feature_map_nystroem.fit_transform(data)\n>>> clf.fit(data_transformed, y)\nLinearSVC()\n>>> clf.score(data_transformed, y)\n0.9987...\n\nReferences\n----------\n* Williams, C.K.I. and Seeger, M.\n \"Using the Nystroem method to speed up kernel machines\",\n Advances in neural information processing systems 2001\n\n* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n Comparison\",\n Advances in Neural Information Processing Systems 2012\n\n\nSee Also\n--------\nRBFSampler : An approximation to the RBF kernel using random Fourier\n features.\n\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels." - } - ], - "functions": [] - }, - { - "name": "sklearn.kernel_ridge", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from metrics.pairwise import pairwise_kernels", - "from linear_model._ridge import _solve_cholesky_kernel", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "KernelRidge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number. See :ref:`ridge_regression` for formula." - }, - { - "name": "kernel", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"linear\"", - "limitation": null, - "ignored": false, - "docstring": "Kernel mapping used internally. This parameter is directly passed to :class:`~sklearn.metrics.pairwise.pairwise_kernel`. If `kernel` is a string, it must be one of the metrics in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`. If `kernel` is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if `kernel` is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two rows from X as input and return the corresponding kernel value as a single number. This means that callables from :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on matrices, not single samples. Use the string identifying the kernel instead." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels." - }, - { - "name": "degree", - "type": "float", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters (keyword arguments) for kernel function passed as callable object." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. If kernel == \"precomputed\" this is instead a precomputed kernel matrix, of shape (n_samples, n_samples)." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "sample_weight", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample, ignored if None is passed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Kernel Ridge regression model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. If kernel == \"precomputed\" this is instead\n a precomputed kernel matrix, of shape (n_samples, n_samples).\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample, ignored if None is passed.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. If kernel == \"precomputed\" this is instead a precomputed kernel matrix, shape = [n_samples, n_samples_fitted], where n_samples_fitted is the number of samples used in the fitting for this estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the kernel ridge model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples. If kernel == \"precomputed\" this is instead a\n precomputed kernel matrix, shape = [n_samples,\n n_samples_fitted], where n_samples_fitted is the number of\n samples used in the fitting for this estimator.\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Returns predicted values." - } - ], - "docstring": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float or array-like of shape (n_targets,), default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number. See :ref:`ridge_regression` for formula.\n\nkernel : string or callable, default=\"linear\"\n Kernel mapping used internally. This parameter is directly passed to\n :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n If `kernel` is a string, it must be one of the metrics\n in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.\n If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if `kernel` is a callable function, it is called on\n each pair of instances (rows) and the resulting value recorded. The\n callable should take two rows from X as input and return the\n corresponding kernel value as a single number. This means that\n callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n they operate on matrices, not single samples. Use the string\n identifying the kernel instead.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : mapping of string to any, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nAttributes\n----------\ndual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Representation of weight vector(s) in kernel space\n\nX_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data, which is also required for prediction. If\n kernel == \"precomputed\" this is instead the precomputed\n training matrix, of shape (n_samples, n_samples).\n\nReferences\n----------\n* Kevin P. Murphy\n \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n chapter 14.4.3, pp. 492-493\n\nSee Also\n--------\nsklearn.linear_model.Ridge : Linear ridge regression.\nsklearn.svm.SVR : Support Vector Regression implemented using libsvm.\n\nExamples\n--------\n>>> from sklearn.kernel_ridge import KernelRidge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = KernelRidge(alpha=1.0)\n>>> clf.fit(X, y)\nKernelRidge(alpha=1.0)" - } - ], - "functions": [] - }, - { - "name": "sklearn.multiclass", - "imports": [ - "import array", - "import numpy as np", - "import warnings", - "import scipy.sparse as sp", - "import itertools", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import clone", - "from base import is_classifier", - "from base import MultiOutputMixin", - "from base import MetaEstimatorMixin", - "from base import is_regressor", - "from base import _is_pairwise", - "from preprocessing import LabelBinarizer", - "from metrics.pairwise import euclidean_distances", - "from utils import check_random_state", - "from utils.deprecation import deprecated", - "from utils._tags import _safe_tags", - "from utils.validation import _num_samples", - "from utils.validation import check_is_fitted", - "from utils.validation import check_X_y", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.multiclass import check_classification_targets", - "from utils.multiclass import _ovr_decision_function", - "from utils.metaestimators import _safe_split", - "from utils.metaestimators import if_delegate_has_method", - "from utils.fixes import delayed", - "from exceptions import NotFittedError", - "from joblib import Parallel" - ], - "classes": [ - { - "name": "_ConstantPredictor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "OneVsRestClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the `n_classes` one-vs-rest problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets. An indicator matrix turns on multilabel classification." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nReturns\n-------\nself" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets. An indicator matrix turns on multilabel classification." - }, - { - "name": "classes", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is only required in the first call of partial_fit and can be omitted in the subsequent calls." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Predicted multi-class targets." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by label of classes.\n\nNote that in the multilabel case, each sample can have any number of\nlabels. This returns the marginal probability that the given sample has\nthe label in question. For example, it is entirely consistent that two\nlabels both have a 90% probability of applying to a given sample.\n\nIn the single label multiclass case, the rows of the returned matrix\nsum to 1.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : (sparse) array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the distance of each sample from the decision boundary for\neach class. This can only be used with estimators which implement the\ndecision_function method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification." - }, - { - "name": "multilabel_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Whether this is a multilabel classifier" - }, - { - "name": "n_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - }, - { - "name": "_first_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes`\n one-vs-rest problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of `n_classes` estimators\n Estimators used for predictions.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function. This attribute\n exists only if the ``estimators_`` defines ``coef_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nintercept_ : ndarray of shape (1, 1) or (n_classes, 1)\n If ``y`` is binary, the shape is ``(1, 1)`` else ``(n_classes, 1)``\n This attribute exists only if the ``estimators_`` defines\n ``intercept_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nclasses_ : array, shape = [`n_classes`]\n Class labels.\n\nn_classes_ : int\n Number of classes.\n\nlabel_binarizer_ : LabelBinarizer object\n Object used to transform multiclass labels to binary labels and\n vice-versa.\n\nmultilabel_ : boolean\n Whether a OneVsRestClassifier is a multilabel classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.multiclass import OneVsRestClassifier\n>>> from sklearn.svm import SVC\n>>> X = np.array([\n... [10, 10],\n... [8, 10],\n... [-5, 5.5],\n... [-5.4, 5.5],\n... [-20, -20],\n... [-15, -20]\n... ])\n>>> y = np.array([0, 0, 1, 1, 2, 2])\n>>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n>>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\narray([2, 0, 1])\n\nSee Also\n--------\nsklearn.multioutput.MultiOutputClassifier : Alternate way of extending an\n estimator for multilabel classification.\nsklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n to binary indicator matrix." - }, - { - "name": "OneVsOneClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the `n_classes * ( n_classes - 1) / 2` OVO problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nReturns\n-------\nself" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets." - }, - { - "name": "classes", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is only required in the first call of partial_fit and can be omitted in the subsequent calls." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.\n\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Estimate the best class label for each sample in X.\n\nThis is implemented as ``argmax(decision_function(X), axis=1)`` which\nwill return the label of the class with most votes by estimators\npredicting the outcome of a decision for each possible class pair.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decision function for the OneVsOneClassifier.\n\nThe decision values for the samples are computed by adding the\nnormalized sum of pair-wise classification confidence levels to the\nvotes in order to disambiguate between the decision values when the\nvotes for all the classes are equal leading to a tie.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification." - }, - { - "name": "n_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix" - } - ], - "docstring": "One-vs-one multiclass strategy\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes * (\n n_classes - 1) / 2` OVO problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\nn_classes_ : int\n Number of classes\n\npairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n Indices of samples used when training the estimators.\n ``None`` when ``estimator``'s `pairwise` tag is False.\n\n .. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.25) and onward, `pairwise_indices_` will use the\n pairwise estimator tag instead.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multiclass import OneVsOneClassifier\n>>> from sklearn.svm import LinearSVC\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, shuffle=True, random_state=0)\n>>> clf = OneVsOneClassifier(\n... LinearSVC(random_state=0)).fit(X_train, y_train)\n>>> clf.predict(X_test[:10])\narray([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])" - }, - { - "name": "OutputCodeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`." - }, - { - "name": "code_size", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Percentage of the number of classes to be used to create the code book. A number between 0 and 1 will require fewer classifiers than one-vs-the-rest. A number greater than 1 will require more classifiers than one-vs-the-rest." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to initialize the codebook. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the multiclass problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-class targets." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : numpy array of shape [n_samples]\n Multi-class targets.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets." - } - ], - "docstring": "(Error-Correcting) Output-Code multiclass strategy\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted. At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\ncode_size : float\n Percentage of the number of classes to be used to create the code book.\n A number between 0 and 1 will require fewer classifiers than\n one-vs-the-rest. A number greater than 1 will require more classifiers\n than one-vs-the-rest.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the codebook.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the multiclass problems\n are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of `int(n_classes * code_size)` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\ncode_book_ : numpy array of shape [n_classes, code_size]\n Binary array containing the code of each class.\n\nExamples\n--------\n>>> from sklearn.multiclass import OutputCodeClassifier\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = OutputCodeClassifier(\n... estimator=RandomForestClassifier(random_state=0),\n... random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] \"Solving multiclass learning problems via error-correcting output\n codes\",\n Dietterich T., Bakiri G.,\n Journal of Artificial Intelligence Research 2,\n 1995.\n\n.. [2] \"The error coding method and PICTs\",\n James G., Hastie T.,\n Journal of Computational and Graphical statistics 7,\n 1998.\n\n.. [3] \"The Elements of Statistical Learning\",\n Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n 2008." - } - ], - "functions": [ - { - "name": "_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a single binary estimator." - }, - { - "name": "_partial_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Partially fit a single binary estimator." - }, - { - "name": "_predict_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make predictions using a single binary estimator." - }, - { - "name": "_check_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that an estimator implements the necessary methods." - }, - { - "name": "_fit_ovo_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a single binary estimator (one-vs-one)." - }, - { - "name": "_partial_fit_ovo_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Partially fit a single binary estimator(one-vs-one)." - } - ] - }, - { - "name": "sklearn.multioutput", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from joblib import Parallel", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from base import BaseEstimator", - "from base import clone", - "from base import MetaEstimatorMixin", - "from base import RegressorMixin", - "from base import ClassifierMixin", - "from base import is_classifier", - "from model_selection import cross_val_predict", - "from utils import check_array", - "from utils import check_X_y", - "from utils import check_random_state", - "from utils.metaestimators import if_delegate_has_method", - "from utils.validation import check_is_fitted", - "from utils.validation import has_fit_parameter", - "from utils.validation import _check_fit_params", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "_MultiOutputEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-output targets." - }, - { - "name": "classes", - "type": "List[NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Each array is unique classes for one output in str/int Can be obtained by via ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the target matrix of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nclasses : list of ndarray of shape (n_outputs,)\n Each array is unique classes for one output in str/int\n Can be obtained by via\n ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\n target matrix of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-output targets. An indicator matrix turns on multilabel estimation." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``estimator.fit`` method of each step. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets. An indicator matrix turns on multilabel\n estimation.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict multi-output variable using a model\n trained for each target variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets predicted across multiple predictors.\n Note: Separate models are generated for each predictor." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MultiOutputRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit` and :term:`predict`." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported by the passed estimator) will be parallelized for each target. When individual estimators are fast to train or predict, using ``n_jobs > 1`` can result in slower performance due to the parallelism overhead. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all available processes / threads. See :term:`Glossary ` for more details. .. versionchanged:: 0.20 `n_jobs` default changed from 1 to None" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multi-output targets." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Multi target regression\n\nThis strategy consists of fitting one regressor per target. This is a\nsimple strategy for extending regressors that do not natively support\nmulti-target regression.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and :term:`predict`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import load_linnerud\n>>> from sklearn.multioutput import MultiOutputRegressor\n>>> from sklearn.linear_model import Ridge\n>>> X, y = load_linnerud(return_X_y=True)\n>>> clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)\n>>> clf.predict(X[[0]])\narray([[176..., 35..., 57...]])" - }, - { - "name": "MultiOutputClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing :term:`fit`, :term:`score` and :term:`predict_proba`." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported by the passed estimator) will be parallelized for each target. When individual estimators are fast to train or predict, using ``n_jobs > 1`` can result in slower performance due to the parallelism overhead. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all available processes / threads. See :term:`Glossary ` for more details. .. versionchanged:: 0.20 `n_jobs` default changed from 1 to None" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying classifier supports sample weights." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``estimator.fit`` method of each step. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying classifier supports sample\n weights.\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\nReturns prediction probabilities for each class of each output.\n\nThis method will raise a ``ValueError`` if any of the\nestimators do not have ``predict_proba``.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data\n\nReturns\n-------\np : array of shape (n_samples, n_classes), or a list of n_outputs such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n\n .. versionchanged:: 0.19\n This function now returns a list of arrays where the length of\n the list is ``n_outputs``, and each array is (``n_samples``,\n ``n_classes``) for that particular output." - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values for X" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the mean accuracy on the given test data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples\n\ny : array-like of shape (n_samples, n_outputs)\n True values for X\n\nReturns\n-------\nscores : float\n accuracy_score of self.predict(X) versus y" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi target classification\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit`, :term:`score` and\n :term:`predict_proba`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> from sklearn.neighbors import KNeighborsClassifier\n\n>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n>>> clf = MultiOutputClassifier(KNeighborsClassifier()).fit(X, y)\n>>> clf.predict(X[-2:])\narray([[1, 1, 0], [1, 1, 1]])" - }, - { - "name": "_BaseChain", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the `fit` method of each step. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict on the data matrix X using the ClassifierChain model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nY_pred : array-like of shape (n_samples, n_classes)\n The predicted values." - } - ], - "docstring": null - }, - { - "name": "ClassifierChain", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the classifier chain is built." - }, - { - "name": "order", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, the order will be determined by the order of columns in the label matrix Y.:: order = [0, 1, 2, ..., Y.shape[1] - 1] The order of the chain can be explicitly set by providing a list of integers. For example, for a chain of length 5.:: order = [1, 3, 2, 4, 0] means that the first model in the chain will make predictions for column 1 in the Y matrix, the second model will make predictions for column 3, etc. If order is 'random' a random ordering will be used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines whether to use cross validated predictions or true labels for the results of previous estimators in the chain. Possible inputs for cv are: - None, to use true labels when fitting, - integer, to specify the number of folds in a (Stratified)KFold, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``order='random'``, determines random number generation for the chain order. In addition, it controls the random seed given at each `base_estimator` at each chaining iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nY_prob : array-like of shape (n_samples, n_classes)" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the decision_function of the models in the chain.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY_decision : array-like of shape (n_samples, n_classes)\n Returns the decision function of the sample for each model\n in the chain." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : list\n A list of arrays of length ``len(estimators_)`` containing the\n class labels for each estimator in the chain.\n\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multioutput import ClassifierChain\n>>> X, Y = make_multilabel_classification(\n... n_samples=12, n_classes=3, random_state=0\n... )\n>>> X_train, X_test, Y_train, Y_test = train_test_split(\n... X, Y, random_state=0\n... )\n>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n>>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n>>> chain.fit(X_train, Y_train).predict(X_test)\narray([[1., 1., 0.],\n [1., 0., 0.],\n [0., 1., 0.]])\n>>> chain.predict_proba(X_test)\narray([[0.8387..., 0.9431..., 0.4576...],\n [0.8878..., 0.3684..., 0.2640...],\n [0.0321..., 0.9935..., 0.0625...]])\n\nSee Also\n--------\nRegressorChain : Equivalent for regression.\nMultioutputClassifier : Classifies each output independently rather than\n chaining.\n\nReferences\n----------\nJesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\nChains for Multi-label Classification\", 2009." - }, - { - "name": "RegressorChain", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the classifier chain is built." - }, - { - "name": "order", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, the order will be determined by the order of columns in the label matrix Y.:: order = [0, 1, 2, ..., Y.shape[1] - 1] The order of the chain can be explicitly set by providing a list of integers. For example, for a chain of length 5.:: order = [1, 3, 2, 4, 0] means that the first model in the chain will make predictions for column 1 in the Y matrix, the second model will make predictions for column 3, etc. If order is 'random' a random ordering will be used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines whether to use cross validated predictions or true labels for the results of previous estimators in the chain. Possible inputs for cv are: - None, to use true labels when fitting, - integer, to specify the number of folds in a (Stratified)KFold, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``order='random'``, determines random number generation for the chain order. In addition, it controls the random seed given at each `base_estimator` at each chaining iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the `fit` method at each step of the regressor chain. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method at each step\n of the regressor chain.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.multioutput import RegressorChain\n>>> from sklearn.linear_model import LogisticRegression\n>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n>>> chain.predict(X)\narray([[0., 2.],\n [1., 1.],\n [2., 0.]])\n\nSee Also\n--------\nClassifierChain : Equivalent for classification.\nMultioutputRegressor : Learns each output independently rather than\n chaining." - } - ], - "functions": [ - { - "name": "_fit_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_fit_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.naive_bayes", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import logsumexp", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from preprocessing import binarize", - "from preprocessing import LabelBinarizer", - "from preprocessing import label_binarize", - "from utils import check_X_y", - "from utils import check_array", - "from utils import deprecated", - "from utils.extmath import safe_sparse_dot", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.validation import check_is_fitted", - "from utils.validation import check_non_negative", - "from utils.validation import column_or_1d", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "_BaseNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_classes, n_samples).\n\nInput is passed to _joint_log_likelihood as-is by predict,\npredict_proba and predict_log_proba." - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "To be overridden in subclasses with the actual checks." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X" - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return log-probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`." - } - ], - "docstring": "Abstract base class for naive Bayes estimators" - }, - { - "name": "GaussianNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "priors", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - }, - { - "name": "var_smoothing", - "type": "float", - "hasDefault": true, - "default": "1e-9", - "limitation": null, - "ignored": false, - "docstring": "Portion of the largest variance of all features that is added to variances for calculation stability. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted). .. versionadded:: 0.17 Gaussian Naive Bayes supports fitting with *sample_weight*." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian Naive Bayes according to X, y\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*.\n\nReturns\n-------\nself : object" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_mean_variance", - "decorators": [], - "parameters": [ - { - "name": "n_past", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples represented in old mean and variance. If sample weights were given, this should contain the sum of sample weights represented in old mean and variance." - }, - { - "name": "mu", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Means for Gaussians in original set." - }, - { - "name": "var", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Variances for Gaussians in original set." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute online update of Gaussian mean and variance.\n\nGiven starting sample count, mean, and variance, a new set of\npoints X, and optionally sample weights, return the updated mean and\nvariance. (NB - each dimension (column) in X is treated as independent\n-- you get variance, not covariance).\n\nCan take scalar mean and variance, or vector mean and variance to\nsimultaneously update a number of independent Gaussians.\n\nSee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nParameters\n----------\nn_past : int\n Number of samples represented in old mean and variance. If sample\n weights were given, this should contain the sum of sample\n weights represented in old mean and variance.\n\nmu : array-like of shape (number of Gaussians,)\n Means for Gaussians in original set.\n\nvar : array-like of shape (number of Gaussians,)\n Variances for Gaussians in original set.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\ntotal_mu : array-like of shape (number of Gaussians,)\n Updated mean for each Gaussian over the combined set.\n\ntotal_var : array-like of shape (number of Gaussians,)\n Updated variance for each Gaussian over the combined set." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted). .. versionadded:: 0.17" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n\nReturns\n-------\nself : object" - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "_refit", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, act as though this were the first time we called _partial_fit (ie, throw away any past fitting and start over)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Actual implementation of Gaussian NB fitting.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n_refit : bool, default=False\n If true, act as though this were the first time we called\n _partial_fit (ie, throw away any past fitting and start over).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Gaussian Naive Bayes (GaussianNB)\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : array-like of shape (n_classes,)\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n Portion of the largest variance of all features that is added to\n variances for calculation stability.\n\n .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n class labels known to the classifier\n\nepsilon_ : float\n absolute additive value to variances\n\nsigma_ : ndarray of shape (n_classes, n_features)\n variance of each feature per class\n\ntheta_ : ndarray of shape (n_classes, n_features)\n mean of each feature per class\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]" - }, - { - "name": "_BaseDiscreteNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_class_log_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "_init_counters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB" - }, - { - "name": "MultinomialNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count and smooth feature occurrences." - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply smoothing to raw counts and recompute log probabilities" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the posterior log probability of the samples X" - } - ], - "docstring": "Naive Bayes classifier for multinomial models\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes, )\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features\n given a class, ``P(x_i|y)``.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``intercept_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import MultinomialNB\n>>> clf = MultinomialNB()\n>>> clf.fit(X, y)\nMultinomialNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nNotes\n-----\nFor the rationale behind the names `coef_` and `intercept_`, i.e.\nnaive Bayes as a linear classifier, see J. Rennie et al. (2003),\nTackling the poor assumptions of naive Bayes text classifiers, ICML.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html" - }, - { - "name": "ComplementNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Only used in edge case with a single class in the training set." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. Not used." - }, - { - "name": "norm", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not a second normalization of the weights is performed. The default behavior mirrors the implementations found in Mahout and Weka, which do not follow the full algorithm described in Table 9 of the paper." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count feature occurrences." - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply smoothing to raw counts and compute the weights." - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the class scores for the samples in X." - } - ], - "docstring": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\nfit_prior : bool, default=True\n Only used in edge case with a single class in the training set.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. Not used.\n\nnorm : bool, default=False\n Whether or not a second normalization of the weights is performed. The\n default behavior mirrors the implementations found in Mahout and Weka,\n which do not follow the full algorithm described in Table 9 of the\n paper.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class. Only used in edge\n case with a single class in the training set.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_all_ : ndarray of shape (n_features,)\n Number of samples encountered for each feature during fitting. This\n value is weighted by the sample weight when provided.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature) during fitting.\n This value is weighted by the sample weight when provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical weights for class complements.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import ComplementNB\n>>> clf = ComplementNB()\n>>> clf.fit(X, y)\nComplementNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nRennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\nTackling the poor assumptions of naive bayes text classifiers. In ICML\n(Vol. 3, pp. 616-623).\nhttps://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf" - }, - { - "name": "BernoulliNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "binarize", - "type": "Optional[float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count and smooth feature occurrences." - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply smoothing to raw counts and recompute log probabilities" - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the posterior log probability of the samples X" - } - ], - "docstring": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nbinarize : float or None, default=0.0\n Threshold for binarizing (mapping to booleans) of sample features.\n If None, input is presumed to already consist of binary vectors.\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes)\n Log probability of each class (smoothed).\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `BernoulliNB`\n as a linear model.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features given a class, P(x_i|y).\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `BernoulliNB`\n as a linear model.\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> Y = np.array([1, 2, 3, 4, 4, 5])\n>>> from sklearn.naive_bayes import BernoulliNB\n>>> clf = BernoulliNB()\n>>> clf.fit(X, Y)\nBernoulliNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\nA. McCallum and K. Nigam (1998). A comparison of event models for naive\nBayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\nText Categorization, pp. 41-48.\n\nV. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\nnaive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS)." - }, - { - "name": "CategoricalNB", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." - }, - { - "name": "fit_prior", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used." - }, - { - "name": "class_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data." - }, - { - "name": "min_categories", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of categories per feature. - integer: Sets the minimum number of categories per feature to `n_categories` for each features. - array-like: shape (n_features,) where `n_categories[i]` holds the minimum number of categories for the ith column of the input. - None (default): Determines the number of categories automatically from the training data. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. Here, each feature of X is assumed to be from a different categorical distribution. It is further assumed that all categories of each feature are represented by the numbers 0, ..., n - 1, where n refers to the total number of categories for the given feature. This can, for instance, be achieved with the help of OrdinalEncoder." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. Here, each feature of X is assumed to be from a different categorical distribution. It is further assumed that all categories of each feature are represented by the numbers 0, ..., n - 1, where n refers to the total number of categories for the given feature. This can, for instance, be achieved with the help of OrdinalEncoder." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_init_counters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_n_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_joint_log_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Naive Bayes classifier for categorical features\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nmin_categories : int or array-like of shape (n_features,), default=None\n Minimum number of categories per feature.\n\n - integer: Sets the minimum number of categories per feature to\n `n_categories` for each features.\n - array-like: shape (n_features,) where `n_categories[i]` holds the\n minimum number of categories for the ith column of the input.\n - None (default): Determines the number of categories automatically\n from the training data.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategory_count_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the number of samples\n encountered for each class and category of the specific feature.\n\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\nfeature_log_prob_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the empirical log probability\n of categories given the respective feature and class, ``P(x_i|y)``.\n\nn_features_ : int\n Number of features of each sample.\n\nn_categories_ : ndarray of shape (n_features,), dtype=np.int64\n Number of categories for each feature. This value is\n inferred from the data or set by the minimum number of categories.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import CategoricalNB\n>>> clf = CategoricalNB()\n>>> clf.fit(X, y)\nCategoricalNB()\n>>> print(clf.predict(X[2:3]))\n[3]" - } - ], - "functions": [] - }, - { - "name": "sklearn.pipeline", - "imports": [ - "from collections import defaultdict", - "from itertools import islice", - "import numpy as np", - "from scipy import sparse", - "from joblib import Parallel", - "from base import clone", - "from base import TransformerMixin", - "from utils._estimator_html_repr import _VisualBlock", - "from utils.metaestimators import if_delegate_has_method", - "from utils import Bunch", - "from utils import _print_elapsed_time", - "from utils.deprecation import deprecated", - "from utils._tags import _safe_tags", - "from utils.validation import check_memory", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils.metaestimators import _BaseComposition" - ], - "classes": [ - { - "name": "Pipeline", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "steps", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of (name, transform) tuples (implementing fit/transform) that are chained, in the order in which they are chained, with the last object an estimator." - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each step will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `steps` of the `Pipeline`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`steps`.\n\nReturns\n-------\nself" - }, - { - "name": "_validate_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (idx, (name, trans)) tuples from self.steps\n\nWhen filter_passthrough is True, 'passthrough' and None transformers\nare filtered out." - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the length of the Pipeline" - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a sub-pipeline or a single esimtator in the pipeline\n\nIndexing with an integer will return an estimator; using a slice\nreturns another Pipeline instance which copies a slice of this\nPipeline. This copy is shallow: modifying (or fitting) estimators in\nthe sub-pipeline will affect the larger pipeline and vice-versa.\nHowever, replacing a value in `step` will not affect a copy." - }, - { - "name": "_estimator_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "named_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model\n\nFit all the transforms one after the other and transform the\ndata, then fit the transformed data using the final estimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nself : Pipeline\n This estimator" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model and transform with the final estimator\n\nFits all the transforms one after the other and transforms the\ndata, then uses fit_transform on transformed data with the final\nestimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)\n Transformed samples" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "**predict_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to the ``predict`` called at the end of all transformations in the pipeline. Note that while this may be used to return uncertainties from some models with return_std or return_cov, uncertainties that are generated by the transformations in the pipeline are not propagated to the final estimator. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms to the data, and predict with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n**predict_params : dict of string -> object\n Parameters to the ``predict`` called at the end of all\n transformations in the pipeline. Note that while this may be\n used to return uncertainties from some models with return_std\n or return_cov, uncertainties that are generated by the\n transformations in the pipeline are not propagated to the\n final estimator.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny_pred : array-like" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Applies fit_predict of last step in pipeline after transforms.\n\nApplies fit_transforms of a pipeline to the data, followed by the\nfit_predict method of the final estimator in the pipeline. Valid\nonly if the final estimator implements fit_predict.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of\n the pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps\n of the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\ny_pred : array-like" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and predict_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_proba : array-like of shape (n_samples, n_classes)" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and decision_function of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)" - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and score_samples of the final estimator.\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)" - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and predict_log_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform. Must fulfill input requirements of first step of the pipeline." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and transform with the final estimator\n\nThis also works where final estimator is ``None``: all prior\ntransformations are applied.\n\nParameters\n----------\nX : iterable\n Data to transform. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)" - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xt", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data samples, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. Must fulfill input requirements of last step of pipeline's ``inverse_transform`` method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply inverse transformations in reverse order\n\nAll estimators in the pipeline must support ``inverse_transform``.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_transformed_features)\n Data samples, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features. Must fulfill\n input requirements of last step of pipeline's\n ``inverse_transform`` method.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_features)" - }, - { - "name": "_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline." - }, - { - "name": "y", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets used for scoring. Must fulfill label requirements for all steps of the pipeline." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, this argument is passed as ``sample_weight`` keyword argument to the ``score`` method of the final estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply transforms, and score with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\ny : iterable, default=None\n Targets used for scoring. Must fulfill label requirements for all\n steps of the pipeline.\n\nsample_weight : array-like, default=None\n If not None, this argument is passed as ``sample_weight`` keyword\n argument to the ``score`` method of the final estimator.\n\nReturns\n-------\nscore : float" - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement fit and transform methods.\nThe final estimator only needs to implement fit.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters.\nFor this, it enables setting parameters of the various steps using their\nnames and the parameter name separated by a '__', as in the example below.\nA step's estimator may be replaced entirely by setting the parameter\nwith its name to another estimator, or a transformer removed by setting\nit to 'passthrough' or ``None``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.5\n\nParameters\n----------\nsteps : list\n List of (name, transform) tuples (implementing fit/transform) that are\n chained, in the order in which they are chained, with the last object\n an estimator.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nAttributes\n----------\nnamed_steps : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n Read-only attribute to access any step parameter by user given name.\n Keys are step names and values are steps parameters.\n\nSee Also\n--------\nmake_pipeline : Convenience function for simplified pipeline construction.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.pipeline import Pipeline\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n>>> # The pipeline can be used as any other estimator\n>>> # and avoids leaking the test set into the train set\n>>> pipe.fit(X_train, y_train)\nPipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n>>> pipe.score(X_test, y_test)\n0.88" - }, - { - "name": "FeatureUnion", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "transformer_list", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of transformer objects to be applied to the data. The first half of each tuple is the name of the transformer. The tranformer can be 'drop' for it to be ignored. .. versionchanged:: 0.22 Deprecated `None` as a transformer in favor of 'drop'." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "transformer_weights", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multiplicative weights for features per transformer. Keys are transformer names, values the weights. Raises ValueError if key not present in ``transformer_list``." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformer_list` of the\n`FeatureUnion`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.\n\nReturns\n-------\nself" - }, - { - "name": "_validate_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_transformer_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, used to fit transformers." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data, used to fit transformers.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : FeatureUnion\n This estimator" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data to be transformed." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers." - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_parallel_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Runs func in parallel on X and y" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data to be transformed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers." - }, - { - "name": "_hstack", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_transformer_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer,\nor removed by setting to 'drop'.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ntransformer_list : list of (string, transformer) tuples\n List of transformer objects to be applied to the data. The first\n half of each tuple is the name of the transformer. The tranformer can\n be 'drop' for it to be ignored.\n\n .. versionchanged:: 0.22\n Deprecated `None` as a transformer in favor of 'drop'.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer.\n Keys are transformer names, values the weights.\n Raises ValueError if key not present in ``transformer_list``.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nSee Also\n--------\nmake_union : Convenience function for simplified feature union\n construction.\n\nExamples\n--------\n>>> from sklearn.pipeline import FeatureUnion\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n... (\"svd\", TruncatedSVD(n_components=2))])\n>>> X = [[0., 1., 3], [2., 2., 5]]\n>>> union.fit_transform(X)\narray([[ 1.5 , 3.0..., 0.8...],\n [-1.5 , 5.7..., -0.4...]])" - } - ], - "functions": [ - { - "name": "_name_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate names for estimators." - }, - { - "name": "make_pipeline", - "decorators": [], - "parameters": [ - { - "name": "*steps", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each step will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct a Pipeline from the given estimators.\n\nThis is a shorthand for the Pipeline constructor; it does not require, and\ndoes not permit, naming the estimators. Instead, their names will be set\nto the lowercase of their types automatically.\n\nParameters\n----------\n*steps : list of estimators.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nSee Also\n--------\nPipeline : Class for creating a pipeline of transforms with a final\n estimator.\n\nExamples\n--------\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.preprocessing import StandardScaler\n>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('gaussiannb', GaussianNB())])\n\nReturns\n-------\np : Pipeline" - }, - { - "name": "_transform_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\nwith the fitted transformer. If ``weight`` is not ``None``, the result will\nbe multiplied by ``weight``." - }, - { - "name": "_fit_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits ``transformer`` to ``X`` and ``y``." - }, - { - "name": "make_union", - "decorators": [], - "parameters": [ - { - "name": "*transformers", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct a FeatureUnion from the given transformers.\n\nThis is a shorthand for the FeatureUnion constructor; it does not require,\nand does not permit, naming the transformers. Instead, they will be given\nnames automatically based on their types. It also does not allow weighting.\n\nParameters\n----------\n*transformers : list of estimators\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nf : FeatureUnion\n\nSee Also\n--------\nFeatureUnion : Class for concatenating the results of multiple transformer\n objects.\n\nExamples\n--------\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> from sklearn.pipeline import make_union\n>>> make_union(PCA(), TruncatedSVD())\n FeatureUnion(transformer_list=[('pca', PCA()),\n ('truncatedsvd', TruncatedSVD())])" - } - ] - }, - { - "name": "sklearn.random_projection", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils.extmath import safe_sparse_dot", - "from utils.random import sample_without_replacement", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from exceptions import DataDimensionalityWarning" - ], - "classes": [ - { - "name": "BaseRandomProjection", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training set: only the shape is used to find optimal random matrix dimensions based on the theory referenced in the afore mentioned papers." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate a sparse random projection matrix.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training set: only the shape is used to find optimal random\n matrix dimensions based on the theory referenced in the\n afore mentioned papers.\n\ny\n Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to project into a smaller dimensional space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project the data by using matrix product with the random matrix\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input data to project into a smaller dimensional space.\n\nReturns\n-------\nX_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Projected array." - } - ], - "docstring": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead." - }, - { - "name": "GaussianRandomProjection", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when `n_components` is set to 'auto'. The value should be strictly positive. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the projection matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format." - } - ], - "docstring": "Reduce dimensionality through Gaussian random projection.\n\nThe components of the random matrix are drawn from N(0, 1 / n_components).\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when `n_components` is set to\n 'auto'. The value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : ndarray of shape (n_components, n_features)\n Random matrix used for the projection.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import GaussianRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = GaussianRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n\nSee Also\n--------\nSparseRandomProjection" - }, - { - "name": "SparseRandomProjection", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset." - }, - { - "name": "density", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Ratio in the range (0, 1] of non-zero component in the random projection matrix. If density = 'auto', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to 'auto'. This value should be strictly positive. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space." - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, ensure that the output of the random projection is a dense numpy array even if the input and random projection matrix are both sparse. In practice, if the number of components is small the number of zero components in the projected data will be very small and it will be more CPU and memory efficient to use a dense representation. If False, the projected data uses a sparse representation if the input is sparse." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the projection matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the random projection matrix\n\nParameters\n----------\nn_components : int\n Dimensionality of the target projection space.\n\nn_features : int\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format." - } - ], - "docstring": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\ndensity : float or 'auto', default='auto'\n Ratio in the range (0, 1] of non-zero component in the random\n projection matrix.\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when n_components is set to\n 'auto'. This value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\ndense_output : bool, default=False\n If True, ensure that the output of the random projection is a\n dense numpy array even if the input and random projection matrix\n are both sparse. In practice, if the number of components is\n small the number of zero components in the projected data will\n be very small and it will be more CPU and memory efficient to\n use a dense representation.\n\n If False, the projected data uses a sparse representation if\n the input is sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : sparse matrix of shape (n_components, n_features)\n Random matrix used for the projection. Sparse matrix will be of CSR\n format.\n\ndensity_ : float in range 0.0 - 1.0\n Concrete density computed from when density = \"auto\".\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import SparseRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = SparseRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n>>> # very few components are non-zero\n>>> np.mean(transformer.components_ != 0)\n0.0100...\n\nSee Also\n--------\nGaussianRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n https://users.soe.ucsc.edu/~optas/papers/jl.pdf" - } - ], - "functions": [ - { - "name": "johnson_lindenstrauss_min_dim", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[ArrayLike, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples that should be a integer greater than 0. If an array is given, it will compute a safe number of components array-wise." - }, - { - "name": "eps", - "type": "Union[NDArray, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum distortion rate in the range (0,1 ) as defined by the Johnson-Lindenstrauss lemma. If an array is given, it will compute a safe number of components array-wise." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like of int\n Number of samples that should be a integer greater than 0. If an array\n is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float, default=0.1\n Maximum distortion rate in the range (0,1 ) as defined by the\n Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n The minimal number of components to guarantee with good probability\n an eps-embedding with n_samples.\n\nExamples\n--------\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([ 663, 11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894, 9868, 11841])\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654" - }, - { - "name": "_check_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Factorize density check according to Li et al." - }, - { - "name": "_check_input_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Factorize argument checking for random matrix generation." - }, - { - "name": "_gaussian_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : ndarray of shape (n_components, n_features)\n The generated Gaussian random matrix.\n\nSee Also\n--------\nGaussianRandomProjection" - }, - { - "name": "_sparse_random_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the target projection space." - }, - { - "name": "n_features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of the original source space." - }, - { - "name": "density", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Ratio of non-zero component in the random projection matrix in the range `(0, 1]` If density = 'auto', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generator used to generate the matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\ndensity : float or 'auto', default='auto'\n Ratio of non-zero component in the random projection matrix in the\n range `(0, 1]`\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated Gaussian random matrix. Sparse matrix will be of CSR\n format.\n\nSee Also\n--------\nSparseRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n http://www.cs.ucsc.edu/~optas/papers/jl.pdf" - } - ] - }, - { - "name": "sklearn.setup", - "imports": [ - "import sys", - "import os", - "from sklearn._build_utils import cythonize_extensions", - "from numpy.distutils.misc_util import Configuration", - "import numpy", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn._config", - "imports": [ - "import os", - "from contextlib import contextmanager as contextmanager" - ], - "classes": [], - "functions": [ - { - "name": "get_config", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Retrieve current values for configuration set by :func:`set_config`\n\nReturns\n-------\nconfig : dict\n Keys are parameter names that can be passed to :func:`set_config`.\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nset_config : Set global scikit-learn configuration." - }, - { - "name": "set_config", - "decorators": [], - "parameters": [ - { - "name": "assume_finite", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, validation for finiteness will be skipped, saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. Global default: False. .. versionadded:: 0.19" - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If set, scikit-learn will attempt to limit the size of temporary arrays to this number of MiB (per job when parallelised), often saving both computation time and memory on expensive operations that can be performed in chunks. Global default: 1024. .. versionadded:: 0.20" - }, - { - "name": "print_changed_only", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, only the parameters that were set to non-default values will be printed when printing an estimator. For example, ``print(SVC())`` while True will only print 'SVC()' while the default behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters. .. versionadded:: 0.21" - }, - { - "name": "display", - "type": "Literal['text', 'diagram']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. Default is 'text'. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19\n\nParameters\n----------\nassume_finite : bool, default=None\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\n .. versionadded:: 0.19\n\nworking_memory : int, default=None\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\n .. versionadded:: 0.20\n\nprint_changed_only : bool, default=None\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()' while the default\n behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n all the non-changed parameters.\n\n .. versionadded:: 0.21\n\ndisplay : {'text', 'diagram'}, default=None\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration." - }, - { - "name": "config_context", - "decorators": [], - "parameters": [ - { - "name": "assume_finite", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, validation for finiteness will be skipped, saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. Global default: False." - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "1024", - "limitation": null, - "ignored": false, - "docstring": "If set, scikit-learn will attempt to limit the size of temporary arrays to this number of MiB (per job when parallelised), often saving both computation time and memory on expensive operations that can be performed in chunks. Global default: 1024." - }, - { - "name": "print_changed_only", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, only the parameters that were set to non-default values will be printed when printing an estimator. For example, ``print(SVC())`` while True will only print 'SVC()', but would print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters when False. Default is True. .. versionchanged:: 0.23 Default changed from False to True." - }, - { - "name": "display", - "type": "Literal['text', 'diagram']", - "hasDefault": true, - "default": "'text'", - "limitation": null, - "ignored": false, - "docstring": "If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. Default is 'text'. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Context manager for global scikit-learn configuration\n\nParameters\n----------\nassume_finite : bool, default=False\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\nworking_memory : int, default=1024\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\nprint_changed_only : bool, default=True\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()', but would print\n 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n when False. Default is True.\n\n .. versionchanged:: 0.23\n Default changed from False to True.\n\ndisplay : {'text', 'diagram'}, default='text'\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nNotes\n-----\nAll settings, not just those presently modified, will be returned to\ntheir previous values when the context manager is exited. This is not\nthread-safe.\n\nExamples\n--------\n>>> import sklearn\n>>> from sklearn.utils.validation import assert_all_finite\n>>> with sklearn.config_context(assume_finite=True):\n... assert_all_finite([float('nan')])\n>>> with sklearn.config_context(assume_finite=True):\n... with sklearn.config_context(assume_finite=False):\n... assert_all_finite([float('nan')])\nTraceback (most recent call last):\n...\nValueError: Input contains NaN, ...\n\nSee Also\n--------\nset_config : Set global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration." - } - ] - }, - { - "name": "sklearn._distributor_init", - "imports": [ - "import os", - "import os.path as op", - "from ctypes import WinDLL" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn._min_dependencies", - "imports": [ - "import platform", - "import argparse" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn", - "imports": [ - "import sys", - "import logging", - "import os", - "import random", - "from _config import get_config", - "from _config import set_config", - "from _config import config_context", - "from None import _distributor_init", - "from None import __check_build", - "from base import clone", - "from utils._show_versions import show_versions", - "import numpy as np" - ], - "classes": [], - "functions": [ - { - "name": "setup_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fixture for the tests to assure globally controllable seeding of RNGs" - } - ] - }, - { - "name": "sklearn.cluster.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster._affinity_propagation", - "imports": [ - "import numpy as np", - "import warnings", - "from exceptions import ConvergenceWarning", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from utils import as_float_array", - "from utils import check_random_state", - "from utils.deprecation import deprecated", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from metrics import euclidean_distances", - "from metrics import pairwise_distances_argmin", - "from _config import config_context" - ], - "classes": [ - { - "name": "AffinityPropagation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "damping", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Damping factor (between 0.5 and 1) is the extent to which the current value is maintained relative to incoming values (weighted 1 - damping). This in order to avoid numerical oscillations when updating these values (messages)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "convergence_iter", - "type": "int", - "hasDefault": true, - "default": "15", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no change in the number of estimated clusters that stops the convergence." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Make a copy of input data." - }, - { - "name": "preference", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preferences for each point - points with larger values of preferences are more likely to be chosen as exemplars. The number of exemplars, ie of clusters, is influenced by the input preferences value. If the preferences are not passed as arguments, they will be set to the median of the input similarities." - }, - { - "name": "affinity", - "type": "Literal['euclidean', 'precomputed']", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Which affinity to use. At the moment 'precomputed' and ``euclidean`` are supported. 'euclidean' uses the negative squared euclidean distance between points." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to be verbose." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the starting state. Use an int for reproducible results across function calls. See the :term:`Glossary `. .. versionadded:: 0.23 this parameter was previously hardcoded as 0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the clustering from features or affinity matrix, and return\ncluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - } - ], - "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndamping : float, default=0.5\n Damping factor (between 0.5 and 1) is the extent to\n which the current value is maintained relative to\n incoming values (weighted 1 - damping). This in order\n to avoid numerical oscillations when updating these\n values (messages).\n\nmax_iter : int, default=200\n Maximum number of iterations.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\ncopy : bool, default=True\n Make a copy of input data.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number\n of exemplars, ie of clusters, is influenced by the input\n preferences value. If the preferences are not passed as arguments,\n they will be set to the median of the input similarities.\n\naffinity : {'euclidean', 'precomputed'}, default='euclidean'\n Which affinity to use. At the moment 'precomputed' and\n ``euclidean`` are supported. 'euclidean' uses the\n negative squared euclidean distance between points.\n\nverbose : bool, default=False\n Whether to be verbose.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nAttributes\n----------\ncluster_centers_indices_ : ndarray of shape (n_clusters,)\n Indices of cluster centers.\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Cluster centers (if affinity != ``precomputed``).\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Stores the affinity matrix used in ``fit``.\n\nn_iter_ : int\n Number of iterations taken to converge.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nThe algorithmic complexity of affinity propagation is quadratic\nin the number of points.\n\nWhen ``fit`` does not converge, ``cluster_centers_`` becomes an empty\narray and all training samples will be labelled as ``-1``. In addition,\n``predict`` will then label every sample as ``-1``.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, ``fit`` will result in\na single cluster center and label ``0`` for every sample. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\n\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007\n\nExamples\n--------\n>>> from sklearn.cluster import AffinityPropagation\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AffinityPropagation(random_state=5).fit(X)\n>>> clustering\nAffinityPropagation(random_state=5)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])\n>>> clustering.predict([[0, 0], [4, 4]])\narray([0, 1])\n>>> clustering.cluster_centers_\narray([[1, 2],\n [4, 2]])" - } - ], - "functions": [ - { - "name": "_equal_similarities_and_preferences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "affinity_propagation", - "decorators": [], - "parameters": [ - { - "name": "S", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix of similarities between points." - }, - { - "name": "preference", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preferences for each point - points with larger values of preferences are more likely to be chosen as exemplars. The number of exemplars, i.e. of clusters, is influenced by the input preferences value. If the preferences are not passed as arguments, they will be set to the median of the input similarities (resulting in a moderate number of clusters). For a smaller amount of clusters, this can be set to the minimum value of the similarities." - }, - { - "name": "convergence_iter", - "type": "int", - "hasDefault": true, - "default": "15", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no change in the number of estimated clusters that stops the convergence." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations" - }, - { - "name": "damping", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Damping factor between 0.5 and 1." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If copy is False, the affinity matrix is modified inplace by the algorithm, for memory efficiency." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the starting state. Use an int for reproducible results across function calls. See the :term:`Glossary `. .. versionadded:: 0.23 this parameter was previously hardcoded as 0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nS : array-like of shape (n_samples, n_samples)\n Matrix of similarities between points.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number of\n exemplars, i.e. of clusters, is influenced by the input preferences\n value. If the preferences are not passed as arguments, they will be\n set to the median of the input similarities (resulting in a moderate\n number of clusters). For a smaller amount of clusters, this can be set\n to the minimum value of the similarities.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\nmax_iter : int, default=200\n Maximum number of iterations\n\ndamping : float, default=0.5\n Damping factor between 0.5 and 1.\n\ncopy : bool, default=True\n If copy is False, the affinity matrix is modified inplace by the\n algorithm, for memory efficiency.\n\nverbose : bool, default=False\n The verbosity level.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nReturns\n-------\n\ncluster_centers_indices : ndarray of shape (n_clusters,)\n Index of clusters centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nWhen the algorithm does not converge, it returns an empty array as\n``cluster_center_indices`` and ``-1`` as label for each training sample.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, a single cluster center\nand label ``0`` for every sample will be returned. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007" - } - ] - }, - { - "name": "sklearn.cluster._agglomerative", - "imports": [ - "import warnings", - "from heapq import heapify", - "from heapq import heappop", - "from heapq import heappush", - "from heapq import heappushpop", - "import numpy as np", - "from scipy import sparse", - "from scipy.sparse.csgraph import connected_components", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from metrics.pairwise import paired_distances", - "from metrics.pairwise import pairwise_distances", - "from neighbors import DistanceMetric", - "from neighbors._dist_metrics import METRIC_MAPPING", - "from utils import check_array", - "from utils._fast_dict import IntFloatDict", - "from utils.fixes import _astype_copy_false", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import check_memory", - "from None import _hierarchical_fast as _hierarchical", - "from _feature_agglomeration import AgglomerationTransform", - "from scipy.sparse.csgraph import minimum_spanning_tree", - "from scipy.cluster import hierarchy" - ], - "classes": [ - { - "name": "AgglomerativeClustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "Optional[int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to find. It must be ``None`` if ``distance_threshold`` is not ``None``." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\", \"manhattan\", \"cosine\", or \"precomputed\". If linkage is \"ward\", only \"euclidean\" is accepted. If \"precomputed\", a distance matrix (instead of a similarity matrix) is needed as input for the fit method." - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory." - }, - { - "name": "connectivity", - "type": "Union[Callable, ArrayLike]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms the data into a connectivity matrix, such as derived from kneighbors_graph. Default is ``None``, i.e, the hierarchical clustering algorithm is unstructured." - }, - { - "name": "compute_full_tree", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at ``n_clusters``. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree. It must be ``True`` if ``distance_threshold`` is not ``None``. By default `compute_full_tree` is \"auto\", which is equivalent to `True` when `distance_threshold` is not `None` or that `n_clusters` is inferior to the maximum between 100 or `0.02 * n_samples`. Otherwise, \"auto\" is equivalent to `False`." - }, - { - "name": "linkage", - "type": "Literal['ward', 'complete', 'average', 'single']", - "hasDefault": true, - "default": "'ward'", - "limitation": null, - "ignored": false, - "docstring": "Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion. - 'ward' minimizes the variance of the clusters being merged. - 'average' uses the average of the distances of each observation of the two sets. - 'complete' or 'maximum' linkage uses the maximum distances between all observations of the two sets. - 'single' uses the minimum of the distances between all observations of the two sets. .. versionadded:: 0.20 Added the 'single' option" - }, - { - "name": "distance_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The linkage distance threshold above which, clusters will not be merged. If not ``None``, ``n_clusters`` must be ``None`` and ``compute_full_tree`` must be ``True``. .. versionadded:: 0.21" - }, - { - "name": "compute_distances", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Computes distances between clusters even if `distance_threshold` is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``affinity='precomputed'``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the hierarchical clustering from features, or distance matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``affinity='precomputed'``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the hierarchical clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - } - ], - "docstring": "Agglomerative Clustering\n\nRecursively merges the pair of clusters that minimally increases\na given linkage distance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or None, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or \"precomputed\".\n If linkage is \"ward\", only \"euclidean\" is accepted.\n If \"precomputed\", a distance matrix (instead of a similarity matrix)\n is needed as input for the fit method.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each sample the neighboring\n samples following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is ``None``, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at ``n_clusters``. This is\n useful to decrease computation time if the number of clusters is not\n small compared to the number of samples. This option is useful only\n when specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of observation. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - 'ward' minimizes the variance of the clusters being merged.\n - 'average' uses the average of the distances of each observation of\n the two sets.\n - 'complete' or 'maximum' linkage uses the maximum distances between\n all observations of the two sets.\n - 'single' uses the minimum of the distances between all observations\n of the two sets.\n\n .. versionadded:: 0.20\n Added the 'single' option\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : ndarray of shape (n_samples)\n cluster labels for each point\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_samples-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> from sklearn.cluster import AgglomerativeClustering\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AgglomerativeClustering().fit(X)\n>>> clustering\nAgglomerativeClustering()\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])" - }, - { - "name": "FeatureAgglomeration", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to find. It must be ``None`` if ``distance_threshold`` is not ``None``." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\", \"manhattan\", \"cosine\", or 'precomputed'. If linkage is \"ward\", only \"euclidean\" is accepted." - }, - { - "name": "memory", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory." - }, - { - "name": "connectivity", - "type": "Union[Callable, ArrayLike]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Connectivity matrix. Defines for each feature the neighboring features following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms the data into a connectivity matrix, such as derived from kneighbors_graph. Default is None, i.e, the hierarchical clustering algorithm is unstructured." - }, - { - "name": "compute_full_tree", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of features. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree. It must be ``True`` if ``distance_threshold`` is not ``None``. By default `compute_full_tree` is \"auto\", which is equivalent to `True` when `distance_threshold` is not `None` or that `n_clusters` is inferior to the maximum between 100 or `0.02 * n_samples`. Otherwise, \"auto\" is equivalent to `False`." - }, - { - "name": "linkage", - "type": "Literal['ward', 'complete', 'average', 'single']", - "hasDefault": true, - "default": "'ward'", - "limitation": null, - "ignored": false, - "docstring": "Which linkage criterion to use. The linkage criterion determines which distance to use between sets of features. The algorithm will merge the pairs of cluster that minimize this criterion. - ward minimizes the variance of the clusters being merged. - average uses the average of the distances of each feature of the two sets. - complete or maximum linkage uses the maximum distances between all features of the two sets. - single uses the minimum of the distances between all observations of the two sets." - }, - { - "name": "pooling_func", - "type": "Callable", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "This combines the values of agglomerated features into a single value, and should accept an array of shape [M, N] and the keyword argument `axis=1`, and reduce it to an array of size [M]." - }, - { - "name": "distance_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The linkage distance threshold above which, clusters will not be merged. If not ``None``, ``n_clusters`` must be ``None`` and ``compute_full_tree`` must be ``True``. .. versionadded:: 0.21" - }, - { - "name": "compute_distances", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Computes distances between clusters even if `distance_threshold` is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the hierarchical clustering on the data\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Agglomerate features.\n\nSimilar to AgglomerativeClustering, but recursively merges features\ninstead of samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or 'precomputed'.\n If linkage is \"ward\", only \"euclidean\" is accepted.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each feature the neighboring\n features following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is None, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at n_clusters. This is useful\n to decrease computation time if the number of clusters is not small\n compared to the number of features. This option is useful only when\n specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of features. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - ward minimizes the variance of the clusters being merged.\n - average uses the average of the distances of each feature of\n the two sets.\n - complete or maximum linkage uses the maximum distances between\n all features of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\npooling_func : callable, default=np.mean\n This combines the values of agglomerated features into a single\n value, and should accept an array of shape [M, N] and the keyword\n argument `axis=1`, and reduce it to an array of size [M].\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : array-like of (n_features,)\n cluster labels for each feature.\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_features`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_features` is a non-leaf\n node and has children `children_[i - n_features]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_features + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets, cluster\n>>> digits = datasets.load_digits()\n>>> images = digits.images\n>>> X = np.reshape(images, (len(images), -1))\n>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n>>> agglo.fit(X)\nFeatureAgglomeration(n_clusters=32)\n>>> X_reduced = agglo.transform(X)\n>>> X_reduced.shape\n(1797, 32)" - } - ], - "functions": [ - { - "name": "_fix_connectivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fixes the connectivity matrix\n\n - copies it\n - makes it symmetric\n - converts it to LIL if necessary\n - completes it if necessary" - }, - { - "name": "_single_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform single linkage clustering on sparse data via the minimum\nspanning tree from scipy.sparse.csgraph, then using union-find to label.\nThe parent array is then generated by walking through the tree." - }, - { - "name": "ward_tree", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "feature matrix representing n_samples samples to be clustered" - }, - { - "name": "connectivity", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If True, return the distance between the clusters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ward clustering based on a Feature matrix.\n\nRecursively merges the pair of clusters that minimally increases\nwithin-cluster variance.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nreturn_distance : bool, default=None\n If True, return the distance between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree\n\nparents : ndarray of shape (n_nodes,) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Only returned if return_distance is set to True (for compatibility).\n The distances between the centers of the nodes. `distances[i]`\n corresponds to a weighted euclidean distance between\n the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to\n leaves of the tree, then `distances[i]` is their unweighted euclidean\n distance. Distances are updated in the following way\n (from scipy.hierarchy.linkage):\n\n The new entry :math:`d(u,v)` is computed as follows,\n\n .. math::\n\n d(u,v) = \\sqrt{\\frac{|v|+|s|}\n {T}d(v,s)^2\n + \\frac{|v|+|t|}\n {T}d(v,t)^2\n - \\frac{|v|}\n {T}d(s,t)^2}\n\n where :math:`u` is the newly joined cluster consisting of\n clusters :math:`s` and :math:`t`, :math:`v` is an unused\n cluster in the forest, :math:`T=|v|+|s|+|t|`, and\n :math:`|*|` is the cardinality of its argument. This is also\n known as the incremental algorithm." - }, - { - "name": "linkage_tree", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "feature matrix representing n_samples samples to be clustered" - }, - { - "name": "connectivity", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix." - }, - { - "name": "linkage", - "type": "Literal[\"average\", \"complete\", \"single\"]", - "hasDefault": true, - "default": "\"complete\"", - "limitation": null, - "ignored": false, - "docstring": "Which linkage criteria to use. The linkage criterion determines which distance to use between sets of observation. - average uses the average of the distances of each observation of the two sets - complete or maximum linkage uses the maximum distances between all observations of the two sets. - single uses the minimum of the distances between all observations of the two sets." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"euclidean\"", - "limitation": null, - "ignored": false, - "docstring": "which metric to use. Can be \"euclidean\", \"manhattan\", or any distance know to paired distance (see metric.pairwise)" - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether or not to return the distances between the clusters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Linkage agglomerative clustering based on a Feature matrix.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nlinkage : {\"average\", \"complete\", \"single\"}, default=\"complete\"\n Which linkage criteria to use. The linkage criterion determines which\n distance to use between sets of observation.\n - average uses the average of the distances of each observation of\n the two sets\n - complete or maximum linkage uses the maximum distances between\n all observations of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\naffinity : str or callable, default=\"euclidean\".\n which metric to use. Can be \"euclidean\", \"manhattan\", or any\n distance know to paired distance (see metric.pairwise)\n\nreturn_distance : bool, default=False\n whether or not to return the distances between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree.\n\nparents : ndarray of shape (n_nodes, ) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Returned when return_distance is set to True.\n\n distances[i] refers to the distance between children[i][0] and\n children[i][1] when they are merged.\n\nSee Also\n--------\nward_tree : Hierarchical clustering with ward linkage." - }, - { - "name": "_complete_linkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_average_linkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_single_linkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_hc_cut", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "Union[NDArray, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form." - }, - { - "name": "children", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The children of each non-leaf node. Values less than `n_samples` correspond to leaves of the tree which are the original samples. A node `i` greater than or equal to `n_samples` is a non-leaf node and has children `children_[i - n_samples]`. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i`" - }, - { - "name": "n_leaves", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of leaves of the tree." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Function cutting the ward tree for a given number of clusters.\n\nParameters\n----------\nn_clusters : int or ndarray\n The number of clusters to form.\n\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_leaves : int\n Number of leaves of the tree.\n\nReturns\n-------\nlabels : array [n_samples]\n cluster labels for each point" - } - ] - }, - { - "name": "sklearn.cluster._bicluster", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import numpy as np", - "from scipy.linalg import norm", - "from scipy.sparse import dia_matrix", - "from scipy.sparse import issparse", - "from scipy.sparse.linalg import eigsh", - "from scipy.sparse.linalg import svds", - "from None import KMeans", - "from None import MiniBatchKMeans", - "from base import BaseEstimator", - "from base import BiclusterMixin", - "from utils import check_random_state", - "from utils.extmath import make_nonnegative", - "from utils.extmath import randomized_svd", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import assert_all_finite", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BaseSpectral", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Creates a biclustering for X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\ny : Ignored" - }, - { - "name": "_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns first `n_components` left and right singular\nvectors u and v, discarding the first `n_discard`." - }, - { - "name": "_k_means", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for spectral biclustering." - }, - { - "name": "SpectralCoclustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of biclusters to find." - }, - { - "name": "svd_method", - "type": "Literal['randomized', 'arpack']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "Selects the algorithm for finding singular vectors. May be 'randomized' or 'arpack'. If 'randomized', use :func:`sklearn.utils.extmath.randomized_svd`, which may be faster for large matrices. If 'arpack', use :func:`scipy.sparse.linalg.svds`, which is more accurate, but possibly slower in some cases." - }, - { - "name": "n_svd_vecs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of vectors to use in calculating the SVD. Corresponds to `ncv` when `svd_method=arpack` and `n_oversamples` when `svd_method` is 'randomized`." - }, - { - "name": "mini_batch", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use mini-batch k-means, which is faster but may get different results." - }, - { - "name": "init", - "type": null, - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization of k-means algorithm; defaults to 'k-means++'." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of random initializations that are tried with the k-means algorithm. If mini-batch k-means is used, the best initialization is chosen and the algorithm runs once. Otherwise, the algorithm is run for each initialization and the best solution chosen." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomizing the singular value decomposition and the k-means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=3\n The number of biclusters to find.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', use\n :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', use\n :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random', or ndarray of shape (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n The bicluster label of each row.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n The bicluster label of each column.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralCoclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_ #doctest: +SKIP\narray([0, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_ #doctest: +SKIP\narray([0, 0], dtype=int32)\n>>> clustering\nSpectralCoclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Dhillon, Inderjit S, 2001. `Co-clustering documents and words using\n bipartite spectral graph partitioning\n `__." - }, - { - "name": "SpectralBiclustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of row and column clusters in the checkerboard structure." - }, - { - "name": "method", - "type": "Literal['bistochastic', 'scale', 'log']", - "hasDefault": true, - "default": "'bistochastic'", - "limitation": null, - "ignored": false, - "docstring": "Method of normalizing and converting singular vectors into biclusters. May be one of 'scale', 'bistochastic', or 'log'. The authors recommend using 'log'. If the data is sparse, however, log normalization will not work, which is why the default is 'bistochastic'. .. warning:: if `method='log'`, the data must be sparse." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "6", - "limitation": null, - "ignored": false, - "docstring": "Number of singular vectors to check." - }, - { - "name": "n_best", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of best singular vectors to which to project the data for clustering." - }, - { - "name": "svd_method", - "type": "Literal['randomized', 'arpack']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "Selects the algorithm for finding singular vectors. May be 'randomized' or 'arpack'. If 'randomized', uses :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster for large matrices. If 'arpack', uses `scipy.sparse.linalg.svds`, which is more accurate, but possibly slower in some cases." - }, - { - "name": "n_svd_vecs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of vectors to use in calculating the SVD. Corresponds to `ncv` when `svd_method=arpack` and `n_oversamples` when `svd_method` is 'randomized`." - }, - { - "name": "mini_batch", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use mini-batch k-means, which is faster but may get different results." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization of k-means algorithm; defaults to 'k-means++'." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of random initializations that are tried with the k-means algorithm. If mini-batch k-means is used, the best initialization is chosen and the algorithm runs once. Otherwise, the algorithm is run for each initialization and the best solution chosen." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomizing the singular value decomposition and the k-means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_best_piecewise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the ``n_best`` vectors that are best approximated by piecewise\nconstant vectors.\n\nThe piecewise vectors are found by k-means; the best is chosen\naccording to Euclidean distance." - }, - { - "name": "_project_and_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project ``data`` to ``vectors`` and cluster the result." - } - ], - "docstring": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n The number of row and column clusters in the checkerboard\n structure.\n\nmethod : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n Method of normalizing and converting singular vectors into\n biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n The authors recommend using 'log'. If the data is sparse,\n however, log normalization will not work, which is why the\n default is 'bistochastic'.\n\n .. warning::\n if `method='log'`, the data must be sparse.\n\nn_components : int, default=6\n Number of singular vectors to check.\n\nn_best : int, default=3\n Number of best singular vectors to which to project the data\n for clustering.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', uses\n :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', uses\n `scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n Row partition labels.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n Column partition labels.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralBiclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_\narray([0, 1], dtype=int32)\n>>> clustering\nSpectralBiclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray\n data: coclustering genes and conditions\n `__." - } - ], - "functions": [ - { - "name": "_scale_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalize ``X`` by scaling rows and columns independently.\n\nReturns the normalized matrix and the row and column scaling\nfactors." - }, - { - "name": "_bistochastic_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalize rows and columns of ``X`` simultaneously so that all\nrows sum to one constant and all columns sum to a different\nconstant." - }, - { - "name": "_log_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalize ``X`` according to Kluger's log-interactions scheme." - } - ] - }, - { - "name": "sklearn.cluster._birch", - "imports": [ - "import warnings", - "import numbers", - "import numpy as np", - "from scipy import sparse", - "from math import sqrt", - "from metrics import pairwise_distances_argmin", - "from metrics.pairwise import euclidean_distances", - "from base import TransformerMixin", - "from base import ClusterMixin", - "from base import BaseEstimator", - "from utils.extmath import row_norms", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning", - "from None import AgglomerativeClustering", - "from _config import config_context" - ], - "classes": [ - { - "name": "_CFNode", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold needed for a new subcluster to enter a CFSubcluster." - }, - { - "name": "branching_factor", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of CF subclusters in each node." - }, - { - "name": "is_leaf", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "We need to know if the CFNode is a leaf or not, in order to retrieve the final subclusters." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "append_subcluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_split_subclusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Remove a subcluster from a node and update it with the\nsplit subclusters." - }, - { - "name": "insert_cf_subcluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Insert a new subcluster into the node." - } - ], - "docstring": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.\n\nParameters\n----------\nthreshold : float\n Threshold needed for a new subcluster to enter a CFSubcluster.\n\nbranching_factor : int\n Maximum number of CF subclusters in each node.\n\nis_leaf : bool\n We need to know if the CFNode is a leaf or not, in order to\n retrieve the final subclusters.\n\nn_features : int\n The number of features.\n\nAttributes\n----------\nsubclusters_ : list\n List of subclusters for a particular CFNode.\n\nprev_leaf_ : _CFNode\n Useful only if is_leaf is True.\n\nnext_leaf_ : _CFNode\n next_leaf. Useful only if is_leaf is True.\n the final subclusters.\n\ninit_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n Manipulate ``init_centroids_`` throughout rather than centroids_ since\n the centroids are just a view of the ``init_centroids_`` .\n\ninit_sq_norm_ : ndarray of shape (branching_factor + 1,)\n manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\ncentroids_ : ndarray of shape (branching_factor + 1, n_features)\n View of ``init_centroids_``.\n\nsquared_norm_ : ndarray of shape (branching_factor + 1,)\n View of ``init_sq_norm_``." - }, - { - "name": "_CFSubcluster", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "linear_sum", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample. This is kept optional to allow initialization of empty subclusters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "merge_subcluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if a cluster is worthy enough to be merged. If\nyes then merge." - }, - { - "name": "radius", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return radius of the subcluster" - } - ], - "docstring": "Each subcluster in a CFNode is called a CFSubcluster.\n\nA CFSubcluster can have a CFNode has its child.\n\nParameters\n----------\nlinear_sum : ndarray of shape (n_features,), default=None\n Sample. This is kept optional to allow initialization of empty\n subclusters.\n\nAttributes\n----------\nn_samples_ : int\n Number of samples that belong to each subcluster.\n\nlinear_sum_ : ndarray\n Linear sum of all the samples in a subcluster. Prevents holding\n all sample data in memory.\n\nsquared_sum_ : float\n Sum of the squared l2 norms of all samples belonging to a subcluster.\n\ncentroid_ : ndarray of shape (branching_factor + 1, n_features)\n Centroid of the subcluster. Prevent recomputing of centroids when\n ``CFNode.centroids_`` is called.\n\nchild_ : _CFNode\n Child Node of the subcluster. Once a given _CFNode is set as the child\n of the _CFNode, it is set to ``self.child_``.\n\nsq_norm_ : ndarray of shape (branching_factor + 1,)\n Squared norm of the subcluster. Used to prevent recomputing when\n pairwise minimum distances are computed." - }, - { - "name": "Birch", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The radius of the subcluster obtained by merging a new sample and the closest subcluster should be lesser than the threshold. Otherwise a new subcluster is started. Setting this value to be very low promotes splitting and vice-versa." - }, - { - "name": "branching_factor", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of CF subclusters in each node. If a new samples enters such that the number of subclusters exceed the branching_factor then that node is split into two nodes with the subclusters redistributed in each. The parent subcluster of that node is removed and two new subclusters are added as parents of the 2 split nodes." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of clusters after the final clustering step, which treats the subclusters from the leaves as new samples. - `None` : the final clustering step is not performed and the subclusters are returned as they are. - :mod:`sklearn.cluster` Estimator : If a model is provided, the model is fit treating the subclusters as new samples and the initial data is mapped to the label of the closest subcluster. - `int` : the model fit is :class:`AgglomerativeClustering` with `n_clusters` set to be equal to the int." - }, - { - "name": "compute_labels", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to compute labels for each fit." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to make a copy of the given data. If set to False, the initial data will be overwritten." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a CF Tree for the input data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_leaves", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Retrieve the leaves of the CF Node.\n\nReturns\n-------\nleaves : list of shape (n_leaves,)\n List of the leaf nodes." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If X is not provided, only the global clustering step is done." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online learning. Prevents rebuilding of CFTree from scratch.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n Input data. If X is not provided, only the global clustering\n step is done.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "_check_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict data using the ``centroids_`` of subclusters.\n\nAvoid computation of the row norms of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nlabels : ndarray of shape(n_samples,)\n Labelled data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n Transformed data." - }, - { - "name": "_global_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Global clustering for the subclusters obtained after fitting" - } - ], - "docstring": "Implements the Birch clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nthreshold : float, default=0.5\n The radius of the subcluster obtained by merging a new sample and the\n closest subcluster should be lesser than the threshold. Otherwise a new\n subcluster is started. Setting this value to be very low promotes\n splitting and vice-versa.\n\nbranching_factor : int, default=50\n Maximum number of CF subclusters in each node. If a new samples enters\n such that the number of subclusters exceed the branching_factor then\n that node is split into two nodes with the subclusters redistributed\n in each. The parent subcluster of that node is removed and two new\n subclusters are added as parents of the 2 split nodes.\n\nn_clusters : int, instance of sklearn.cluster model, default=3\n Number of clusters after the final clustering step, which treats the\n subclusters from the leaves as new samples.\n\n - `None` : the final clustering step is not performed and the\n subclusters are returned as they are.\n\n - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n is fit treating the subclusters as new samples and the initial data\n is mapped to the label of the closest subcluster.\n\n - `int` : the model fit is :class:`AgglomerativeClustering` with\n `n_clusters` set to be equal to the int.\n\ncompute_labels : bool, default=True\n Whether or not to compute labels for each fit.\n\ncopy : bool, default=True\n Whether or not to make a copy of the given data. If set to False,\n the initial data will be overwritten.\n\nAttributes\n----------\nroot_ : _CFNode\n Root of the CFTree.\n\ndummy_leaf_ : _CFNode\n Start pointer to all the leaves.\n\nsubcluster_centers_ : ndarray\n Centroids of all subclusters read directly from the leaves.\n\nsubcluster_labels_ : ndarray\n Labels assigned to the centroids of the subclusters after\n they are clustered globally.\n\nlabels_ : ndarray of shape (n_samples,)\n Array of labels assigned to the input data.\n if partial_fit is used instead of fit, they are assigned to the\n last batch of data.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative implementation that does incremental updates\n of the centers' positions using mini-batches.\n\nNotes\n-----\nThe tree data structure consists of nodes with each node consisting of\na number of subclusters. The maximum number of subclusters in a node\nis determined by the branching factor. Each subcluster maintains a\nlinear sum, squared sum and the number of samples in that subcluster.\nIn addition, each subcluster can also have a node as its child, if the\nsubcluster is not a member of a leaf node.\n\nFor a new point entering the root, it is merged with the subcluster closest\nto it and the linear sum, squared sum and the number of samples of that\nsubcluster are updated. This is done recursively till the properties of\nthe leaf node are updated.\n\nReferences\n----------\n* Tian Zhang, Raghu Ramakrishnan, Maron Livny\n BIRCH: An efficient data clustering method for large databases.\n https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n* Roberto Perdisci\n JBirch - Java implementation of BIRCH clustering algorithm\n https://code.google.com/archive/p/jbirch\n\nExamples\n--------\n>>> from sklearn.cluster import Birch\n>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n>>> brc = Birch(n_clusters=None)\n>>> brc.fit(X)\nBirch(n_clusters=None)\n>>> brc.predict(X)\narray([0, 0, 0, 1, 1, 1])" - } - ], - "functions": [ - { - "name": "_iterate_sparse_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This little hack returns a densified row when iterating over a sparse\nmatrix, instead of constructing a sparse matrix for every row that is\nexpensive." - }, - { - "name": "_split_node", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n according to the nearest distance between the subclusters to the\n pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters." - } - ] - }, - { - "name": "sklearn.cluster._dbscan", - "imports": [ - "import numpy as np", - "import warnings", - "from scipy import sparse", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from neighbors import NearestNeighbors", - "from _dbscan_inner import dbscan_inner" - ], - "classes": [ - { - "name": "DBSCAN", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a :term:`Glossary `, in which case only \"nonzero\" elements may be considered neighbors for DBSCAN. .. versionadded:: 0.17 metric *precomputed* to accept precomputed sparse matrix." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.19" - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The power of the Minkowski metric to be used to calculate distance between points. If None, then ``p=2`` (equivalent to the Euclidean distance)." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform DBSCAN clustering from features, or distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform DBSCAN clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels. Noisy samples are given the label -1." - } - ], - "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : string, or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `, in which\n case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n .. versionadded:: 0.17\n metric *precomputed* to accept precomputed sparse matrix.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=None\n The power of the Minkowski metric to be used to calculate distance\n between points. If None, then ``p=2`` (equivalent to the Euclidean\n distance).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\ncore_sample_indices_ : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\ncomponents_ : ndarray of shape (n_core_samples, n_features)\n Copy of each core sample found by training.\n\nlabels_ : ndarray of shape (n_samples)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples are given the label -1.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 2], [2, 3],\n... [8, 7], [8, 8], [25, 80]])\n>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n>>> clustering.labels_\narray([ 0, 0, 0, 1, 1, -1])\n>>> clustering\nDBSCAN(eps=3, min_samples=2)\n\nSee Also\n--------\nOPTICS : A similar clustering at multiple values of eps. Our implementation\n is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:class:`cluster.OPTICS` provides a similar clustering with lower memory\nusage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19." - } - ], - "functions": [ - { - "name": "dbscan", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A feature array, or array of distances between samples if ``metric='precomputed'``." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph `, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.19" - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "float", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The power of the Minkowski metric to be used to calculate distance between points." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. If precomputed distance are used, parallel execution is not available and thus n_jobs will have no effect." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n A feature array, or array of distances between samples if\n ``metric='precomputed'``.\n\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : str or callable, default='minkowski'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit.\n X may be a :term:`sparse graph `,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=2\n The power of the Minkowski metric to be used to calculate distance\n between points.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with negative\n weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. ``None`` means\n 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n using all processors. See :term:`Glossary ` for more details.\n If precomputed distance are used, parallel execution is not available\n and thus n_jobs will have no effect.\n\nReturns\n-------\ncore_samples : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point. Noisy samples are given the label -1.\n\nSee Also\n--------\nDBSCAN : An estimator interface for this clustering algorithm.\nOPTICS : A similar estimator interface clustering at multiple values of\n eps. Our implementation is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:func:`cluster.optics ` provides a similar\nclustering with lower memory usage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19." - } - ] - }, - { - "name": "sklearn.cluster._feature_agglomeration", - "imports": [ - "import numpy as np", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from scipy.sparse import issparse" - ], - "classes": [ - { - "name": "AgglomerationTransform", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A M by N array of M observations in N dimensions or a length M array of M one-dimensional observations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform a new matrix using the built clustering\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples,)\n A M by N array of M observations in N dimensions or a length\n M array of M one-dimensional observations.\n\nReturns\n-------\nY : ndarray of shape (n_samples, n_clusters) or (n_clusters,)\n The pooled values for each feature cluster." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The values to be assigned to each cluster of samples" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse the transformation.\nReturn a vector of size nb_features with the values of Xred assigned\nto each group of features\n\nParameters\n----------\nXred : array-like of shape (n_samples, n_clusters) or (n_clusters,)\n The values to be assigned to each cluster of samples\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features) or (n_features,)\n A vector of size n_samples with the values of Xred assigned to\n each of the cluster of samples." - } - ], - "docstring": "A class for feature agglomeration via the transform interface" - } - ], - "functions": [] - }, - { - "name": "sklearn.cluster._kmeans", - "imports": [ - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from threadpoolctl import threadpool_limits", - "from threadpoolctl import threadpool_info", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from base import TransformerMixin", - "from metrics.pairwise import euclidean_distances", - "from utils.extmath import row_norms", - "from utils.extmath import stable_cumsum", - "from utils.sparsefuncs_fast import assign_rows_csr", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.validation import _deprecate_positional_args", - "from utils import check_array", - "from utils import gen_batches", - "from utils import check_random_state", - "from utils import deprecated", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils._openmp_helpers import _openmp_effective_n_threads", - "from exceptions import ConvergenceWarning", - "from _k_means_fast import CHUNK_SIZE", - "from _k_means_fast import _inertia_dense", - "from _k_means_fast import _inertia_sparse", - "from _k_means_fast import _mini_batch_update_csr", - "from _k_means_lloyd import lloyd_iter_chunked_dense", - "from _k_means_lloyd import lloyd_iter_chunked_sparse", - "from _k_means_elkan import init_bounds_dense", - "from _k_means_elkan import init_bounds_sparse", - "from _k_means_elkan import elkan_iter_chunked_dense", - "from _k_means_elkan import elkan_iter_chunked_sparse" - ], - "classes": [ - { - "name": "KMeans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form as well as the number of centroids to generate." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm for a single run." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence." - }, - { - "name": "precompute_distances", - "type": "Literal['auto', True, False]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 million. This corresponds to about 100MB overhead per job using double precision. True : always precompute distances. False : never precompute distances. .. deprecated:: 0.23 'precompute_distances' was deprecated in version 0.22 and will be removed in 1.0 (renaming of 0.25). It has no effect." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "copy_x", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified. If False, the original data is modified, and put back before the function returns, but small numerical differences may be introduced by subtracting and then adding the data mean. Note that if the original data is not C-contiguous, a copy will be made even if copy_x is False. If the original data is sparse, but not in CSR format, a copy will be made even if copy_x is False." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center. ``None`` or ``-1`` means using all processors. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "algorithm", - "type": "Literal[\"auto\", \"full\", \"elkan\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "K-means algorithm to use. The classical EM-style algorithm is \"full\". The \"elkan\" variation is more efficient on data with well-defined clusters, by using the triangle inequality. However it's more memory intensive due to the allocation of an extra array of shape (n_samples, n_clusters). For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it might change in the future for a better heuristic. .. versionchanged:: 0.18 Added Elkan algorithm" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_center_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if centers is compatible with X and n_clusters." - }, - { - "name": "_check_test_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_mkl_vcomp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Warns when vcomp and mkl are both present" - }, - { - "name": "_init_centroids", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared euclidean norm of each data point. Pass it if you have it at hands already to avoid it being recomputed here." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. See :term:`Glossary `." - }, - { - "name": "init_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to randomly sample for speeding up the initialization (sometimes at the expense of accuracy)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the initial centroids.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point. Pass it if you have it\n at hands already to avoid it being recomputed here.\n\ninit : {'k-means++', 'random'}, callable or ndarray of shape (n_clusters, n_features)\n Method for initialization.\n\nrandom_state : RandomState instance\n Determines random number generation for centroid initialization.\n See :term:`Glossary `.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous. If a sparse matrix is passed, a copy will be made if it's not in CSR format." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute k-means clustering.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory\n copy if the given data is not C-contiguous.\n If a sparse matrix is passed, a copy will be made if it's not in\n CSR format.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to transform." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute cluster centers and predict cluster index for each sample.\n\nConvenience method; equivalent to calling fit(X) followed by\npredict(X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to transform." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute clustering and transform X to cluster-distance space.\n\nEquivalent to fit(X).transform(X), but more efficiently implemented.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X to a cluster-distance space.\n\nIn the new space, each dimension is the distance to the cluster\ncenters. Note that even if X is sparse, the array returned by\n`transform` will typically be dense.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Guts of transform method; no input validation." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the value of X on the K-means objective.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nscore : float\n Opposite of the value of X on the K-means objective." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm for a\n single run.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nprecompute_distances : {'auto', True, False}, default='auto'\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances.\n\n False : never precompute distances.\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.22 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nverbose : int, default=0\n Verbosity mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\n .. versionchanged:: 0.18\n Added Elkan algorithm\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers. If the algorithm stops before fully\n converging (see ``tol`` and ``max_iter``), these will not be\n consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\ninertia_ : float\n Sum of squared distances of samples to their closest cluster center.\n\nn_iter_ : int\n Number of iterations run.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative online implementation that does incremental\n updates of the centers positions using mini-batches.\n For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n probably much faster than the default batch implementation.\n\nNotes\n-----\nThe k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\nThe average complexity is given by O(k n T), were n is the number of\nsamples and T is the number of iteration.\n\nThe worst case complexity is given by O(n^(k+2/p)) with\nn = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n'How slow is the k-means method?' SoCG2006)\n\nIn practice, the k-means algorithm is very fast (one of the fastest\nclustering algorithms available), but it falls in local minima. That's why\nit can be useful to restart it several times.\n\nIf the algorithm stops before fully converging (because of ``tol`` or\n``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\ni.e. the ``cluster_centers_`` will not be the means of the points in each\ncluster. Also, the estimator will reassign ``labels_`` after the last\niteration to make ``labels_`` consistent with ``predict`` on the training\nset.\n\nExamples\n--------\n\n>>> from sklearn.cluster import KMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n>>> kmeans.labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> kmeans.predict([[0, 0], [12, 3]])\narray([1, 0], dtype=int32)\n>>> kmeans.cluster_centers_\narray([[10., 2.],\n [ 1., 2.]])" - }, - { - "name": "MiniBatchKMeans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form as well as the number of centroids to generate." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations over the complete dataset before stopping independently of any early stopping criterion heuristics." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Size of the mini batches." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "compute_labels", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Compute label assignment and inertia for the complete dataset once the minibatch optimization has converged in fit." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization and random reassignment. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Control early stopping based on the relative center changes as measured by a smoothed, variance-normalized of the mean center squared position changes. This early stopping heuristics is closer to the one used for the batch variant of the algorithms but induces a slight computational and memory overhead over the inertia heuristic. To disable convergence detection based on normalized center change, set tol to 0.0 (default)." - }, - { - "name": "max_no_improvement", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Control early stopping based on the consecutive number of mini batches that does not yield an improvement on the smoothed inertia. To disable convergence detection based on inertia, set max_no_improvement to None." - }, - { - "name": "init_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to randomly sample for speeding up the initialization (sometimes at the expense of accuracy): the only algorithm is initialized by running a batch KMeans on a random subset of the data. This needs to be larger than n_clusters. If `None`, `init_size= 3 * batch_size`." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of random initializations that are tried. In contrast to KMeans, the algorithm is only run once, using the best of the ``n_init`` initializations as measured by inertia." - }, - { - "name": "reassignment_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Control the fraction of the maximum number of counts for a center to be reassigned. A higher value means that low count centers are more easily reassigned, which means that the model will take longer to converge, but should converge in a better clustering." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "counts_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_size_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "random_state_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None). .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the centroids on X by chunking it into mini-batches.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself" - }, - { - "name": "_labels_inertia_minibatch", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute labels and inertia using mini batches.\n\nThis is slightly slower than doing everything at once but prevents\nmemory errors / segfaults.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\ninertia : float\n Sum of squared distances of points to nearest cluster." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coordinates of the data points to cluster. It must be noted that X will be copied if it is not C-contiguous." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Update k means estimate on a single mini-batch X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Coordinates of the data points to cluster. It must be noted that\n X will be copied if it is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nmax_iter : int, default=100\n Maximum number of iterations over the complete dataset before\n stopping independently of any early stopping criterion heuristics.\n\nbatch_size : int, default=100\n Size of the mini batches.\n\nverbose : int, default=0\n Verbosity mode.\n\ncompute_labels : bool, default=True\n Compute label assignment and inertia for the complete dataset\n once the minibatch optimization has converged in fit.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and\n random reassignment. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ntol : float, default=0.0\n Control early stopping based on the relative center changes as\n measured by a smoothed, variance-normalized of the mean center\n squared position changes. This early stopping heuristics is\n closer to the one used for the batch variant of the algorithms\n but induces a slight computational and memory overhead over the\n inertia heuristic.\n\n To disable convergence detection based on normalized center\n change, set tol to 0.0 (default).\n\nmax_no_improvement : int, default=10\n Control early stopping based on the consecutive number of mini\n batches that does not yield an improvement on the smoothed inertia.\n\n To disable convergence detection based on inertia, set\n max_no_improvement to None.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy): the\n only algorithm is initialized by running a batch KMeans on a\n random subset of the data. This needs to be larger than n_clusters.\n\n If `None`, `init_size= 3 * batch_size`.\n\nn_init : int, default=3\n Number of random initializations that are tried.\n In contrast to KMeans, the algorithm is only run once, using the\n best of the ``n_init`` initializations as measured by inertia.\n\nreassignment_ratio : float, default=0.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more easily reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nAttributes\n----------\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : int\n Labels of each point (if compute_labels is set to True).\n\ninertia_ : float\n The value of the inertia criterion associated with the chosen\n partition (if compute_labels is set to True). The inertia is\n defined as the sum of square distances of samples to their nearest\n neighbor.\n\nn_iter_ : int\n Number of batches processed.\n\ncounts_ : ndarray of shape (n_clusters,)\n Weigth sum of each cluster.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\ninit_size_ : int\n The effective number of samples used for the initialization.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\nSee Also\n--------\nKMeans : The classic implementation of the clustering method based on the\n Lloyd's algorithm. It consumes the whole set of input data at each\n iteration.\n\nNotes\n-----\nSee https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\nExamples\n--------\n>>> from sklearn.cluster import MiniBatchKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 0], [4, 4],\n... [4, 5], [0, 1], [2, 2],\n... [3, 2], [5, 5], [1, -1]])\n>>> # manually fit on batches\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6)\n>>> kmeans = kmeans.partial_fit(X[0:6,:])\n>>> kmeans = kmeans.partial_fit(X[6:12,:])\n>>> kmeans.cluster_centers_\narray([[2. , 1. ],\n [3.5, 4.5]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)\n>>> # fit on the whole data\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6,\n... max_iter=10).fit(X)\n>>> kmeans.cluster_centers_\narray([[3.95918367, 2.40816327],\n [1.12195122, 1.3902439 ]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([1, 0], dtype=int32)" - } - ], - "functions": [ - { - "name": "_kmeans_plusplus", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to pick seeds for." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of seeds to choose." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared Euclidean norm of each data point." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to initialize the centers. See :term:`Glossary `." - }, - { - "name": "n_local_trials", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of seeding trials for each center (except the first), of which the one reducing inertia the most is greedily chosen. Set to None to make the number of trials depend logarithmically on the number of seeds (2+log(k)); this is the default." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computational component for initialization of n_clusters by\nk-means++. Prior validation of data is assumed.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds for.\n\nn_clusters : int\n The number of seeds to choose.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared Euclidean norm of each data point.\n\nrandom_state : RandomState instance\n The generator used to initialize the centers.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)); this is the default.\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center." - }, - { - "name": "_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a tolerance which is independent of the dataset." - }, - { - "name": "k_means", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters to form as well as the number of centroids to generate." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight." - }, - { - "name": "init", - "type": "Literal['k-means++', 'random']", - "hasDefault": true, - "default": "'k-means++'", - "limitation": null, - "ignored": false, - "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization." - }, - { - "name": "precompute_distances", - "type": "Literal['auto', True, False]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 million. This corresponds to about 100MB overhead per job using double precision. True : always precompute distances False : never precompute distances .. deprecated:: 0.23 'precompute_distances' was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25). It has no effect." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm to run." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "copy_x", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified. If False, the original data is modified, and put back before the function returns, but small numerical differences may be introduced by subtracting and then adding the data mean. Note that if the original data is not C-contiguous, a copy will be made even if copy_x is False. If the original data is sparse, but not in CSR format, a copy will be made even if copy_x is False." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center. ``None`` or ``-1`` means using all processors. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "algorithm", - "type": "Literal[\"auto\", \"full\", \"elkan\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "K-means algorithm to use. The classical EM-style algorithm is \"full\". The \"elkan\" variation is more efficient on data with well-defined clusters, by using the triangle inequality. However it's more memory intensive due to the allocation of an extra array of shape (n_samples, n_clusters). For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it might change in the future for a better heuristic." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "K-means clustering algorithm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\nn_clusters : int\n The number of clusters to form as well as the number of\n centroids to generate.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nprecompute_distances : {'auto', True, False}\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances\n\n False : never precompute distances\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.23 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nbest_n_iter : int\n Number of iterations corresponding to the best results.\n Returned only if `return_n_iter` is set to True." - }, - { - "name": "_kmeans_single_elkan", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations to cluster. If sparse matrix, must be in CSR format." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "centers_init", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial centers." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm to run." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode." - }, - { - "name": "x_squared_norms", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed x_squared_norms." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. It's not advised to set `tol=0` since convergence might never be declared due to rounding errors. Use a very small number instead." - }, - { - "name": "n_threads", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A single run of k-means elkan, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\nx_squared_norms : array-like, default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run." - }, - { - "name": "_kmeans_single_lloyd", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations to cluster. If sparse matrix, must be in CSR format." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "centers_init", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial centers." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the k-means algorithm to run." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode" - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed x_squared_norms." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. It's not advised to set `tol=0` since convergence might never be declared due to rounding errors. Use a very small number instead." - }, - { - "name": "n_threads", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A single run of k-means lloyd, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode\n\nx_squared_norms : ndarray of shape (n_samples,), default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run." - }, - { - "name": "_labels_inertia", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples to assign to the labels. If sparse matrix, must be in CSR format." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed squared euclidean norm of each data point, to speed up computations." - }, - { - "name": "centers", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The cluster centers." - }, - { - "name": "n_threads", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples to assign to the labels. If sparse matrix, must\n be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Precomputed squared euclidean norm of each data point, to speed up\n computations.\n\ncenters : ndarray of shape (n_clusters, n_features)\n The cluster centers.\n\nn_threads : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The resulting assignment.\n\ninertia : float\n Sum of squared distances of samples to their closest cluster center." - }, - { - "name": "_mini_batch_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The original data array." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weights for each observation in X." - }, - { - "name": "x_squared_norms", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared euclidean norm of each data point." - }, - { - "name": "centers", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The cluster centers. This array is MODIFIED IN PLACE" - }, - { - "name": "old_center_buffer", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy of old centers for monitoring convergence." - }, - { - "name": "compute_squared_diff", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If set to False, the squared diff computation is skipped." - }, - { - "name": "distances", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, should be a pre-allocated array that will be used to store the distances of each sample to its closest center. May not be None when random_reassign is True." - }, - { - "name": "random_reassign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, centers with very low counts are randomly reassigned to observations." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization and to pick new clusters amongst observations with uniform probability. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "reassignment_ratio", - "type": "float", - "hasDefault": true, - "default": ".", - "limitation": null, - "ignored": false, - "docstring": "Control the fraction of the maximum number of counts for a center to be reassigned. A higher value means that low count centers are more likely to be reassigned, which means that the model will take longer to converge, but should converge in a better clustering." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Incremental update of the centers for the Minibatch K-Means algorithm.\n\nParameters\n----------\n\nX : ndarray of shape (n_samples, n_features)\n The original data array.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point.\n\ncenters : ndarray of shape (k, n_features)\n The cluster centers. This array is MODIFIED IN PLACE\n\nold_center_buffer : int\n Copy of old centers for monitoring convergence.\n\ncompute_squared_diff : bool\n If set to False, the squared diff computation is skipped.\n\ndistances : ndarray of shape (n_samples,), dtype=float, default=None\n If not None, should be a pre-allocated array that will be used to store\n the distances of each sample to its closest center.\n May not be None when random_reassign is True.\n\nrandom_reassign : bool, default=False\n If True, centers with very low counts are randomly reassigned\n to observations.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and to\n pick new clusters amongst observations with uniform probability. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nreassignment_ratio : float, default=.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more likely to be reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nverbose : bool, default=False\n Controls the verbosity.\n\nReturns\n-------\ninertia : float\n Sum of squared distances of samples to their closest cluster center.\n\nsquared_diff : ndarray of shape (n_clusters,)\n Squared distances between previous and updated cluster centers." - }, - { - "name": "_mini_batch_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to encapsulate the early stopping logic." - }, - { - "name": "kmeans_plusplus", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to pick seeds from." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of centroids to initialize" - }, - { - "name": "x_squared_norms", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared Euclidean norm of each data point." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for centroid initialization. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_local_trials", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of seeding trials for each center (except the first), of which the one reducing inertia the most is greedily chosen. Set to None to make the number of trials depend logarithmically on the number of seeds (2+log(k))." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Init n_clusters seeds according to k-means++\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds from.\n\nn_clusters : int\n The number of centroids to initialize\n\nx_squared_norms : array-like of shape (n_samples,), default=None\n Squared Euclidean norm of each data point.\n\nrandom_state : int or RandomState instance, default=None\n Determines random number generation for centroid initialization. Pass\n an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.\n\nNotes\n-----\nSelects initial cluster centers for k-mean clustering in a smart way\nto speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n\"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\non Discrete algorithms. 2007\n\nExamples\n--------\n\n>>> from sklearn.cluster import kmeans_plusplus\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n>>> centers\narray([[10, 4],\n [ 1, 0]])\n>>> indices\narray([4, 2])" - } - ] - }, - { - "name": "sklearn.cluster._mean_shift", - "imports": [ - "import numpy as np", - "import warnings", - "from joblib import Parallel", - "from collections import defaultdict", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils import check_random_state", - "from utils import gen_batches", - "from utils import check_array", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from neighbors import NearestNeighbors", - "from metrics.pairwise import pairwise_distances_argmin", - "from _config import config_context" - ], - "classes": [ - { - "name": "MeanShift", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "bandwidth", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Bandwidth used in the RBF kernel. If not given, the bandwidth is estimated using sklearn.cluster.estimate_bandwidth; see the documentation for that function for hints on scalability (see also the Notes, below)." - }, - { - "name": "seeds", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Seeds used to initialize kernels. If not set, the seeds are calculated by clustering.get_bin_seeds with bandwidth as the grid size and default values for other parameters." - }, - { - "name": "bin_seeding", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized. The default value is False. Ignored if seeds argument is not None." - }, - { - "name": "min_bin_freq", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "To speed up the algorithm, accept only those bins with at least min_bin_freq points as seeds." - }, - { - "name": "cluster_all", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, then all points are clustered, even those orphans that are not within any kernel. Orphans are assigned to the nearest kernel. If false, then orphans are given cluster label -1." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations, per seed point before the clustering operation terminates (for that seed point), if has not converged yet. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples to cluster." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform clustering.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to cluster.\n\ny : Ignored" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data to predict." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to." - } - ], - "docstring": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=None\n Bandwidth used in the RBF kernel.\n\n If not given, the bandwidth is estimated using\n sklearn.cluster.estimate_bandwidth; see the documentation for that\n function for hints on scalability (see also the Notes, below).\n\nseeds : array-like of shape (n_samples, n_features), default=None\n Seeds used to initialize kernels. If not set,\n the seeds are calculated by clustering.get_bin_seeds\n with bandwidth as the grid size and default values for\n other parameters.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n The default value is False.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\nn_iter_ : int\n Maximum number of iterations performed on each seed.\n\n .. versionadded:: 0.22\n\nExamples\n--------\n>>> from sklearn.cluster import MeanShift\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = MeanShift(bandwidth=2).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering.predict([[0, 0], [5, 5]])\narray([1, 0])\n>>> clustering\nMeanShift(bandwidth=2)\n\nNotes\n-----\n\nScalability:\n\nBecause this implementation uses a flat kernel and\na Ball Tree to look up members of each kernel, the complexity will tend\ntowards O(T*n*log(n)) in lower dimensions, with n the number of samples\nand T the number of points. In higher dimensions the complexity will\ntend towards O(T*n^2).\n\nScalability can be boosted by using fewer seeds, for example by using\na higher value of min_bin_freq in the get_bin_seeds function.\n\nNote that the estimate_bandwidth function is much less scalable than the\nmean shift algorithm and will be the bottleneck if it is used.\n\nReferences\n----------\n\nDorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\nfeature space analysis\". IEEE Transactions on Pattern Analysis and\nMachine Intelligence. 2002. pp. 603-619." - } - ], - "functions": [ - { - "name": "estimate_bandwidth", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input points." - }, - { - "name": "quantile", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "should be between [0, 1] 0.5 means that the median of all pairwise distances is used." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to use. If not given, all samples are used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to randomly select the samples from input points for bandwidth estimation. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the bandwidth to use with the mean-shift algorithm.\n\nThat this function takes time at least quadratic in n_samples. For large\ndatasets, it's wise to set that parameter to a small value.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input points.\n\nquantile : float, default=0.3\n should be between [0, 1]\n 0.5 means that the median of all pairwise distances is used.\n\nn_samples : int, default=None\n The number of samples to use. If not given, all samples are used.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to randomly select the samples from input points\n for bandwidth estimation. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nbandwidth : float\n The bandwidth parameter." - }, - { - "name": "_mean_shift_single_seed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "mean_shift", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "bandwidth", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel bandwidth. If bandwidth is not given, it is determined using a heuristic based on the median of all pairwise distances. This will take quadratic time in the number of samples. The sklearn.cluster.estimate_bandwidth function can be used to do this more efficiently." - }, - { - "name": "seeds", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Point used as initial kernel locations. If None and bin_seeding=False, each data point is used as a seed. If None and bin_seeding=True, see bin_seeding." - }, - { - "name": "bin_seeding", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized. Ignored if seeds argument is not None." - }, - { - "name": "min_bin_freq", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "To speed up the algorithm, accept only those bins with at least min_bin_freq points as seeds." - }, - { - "name": "cluster_all", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, then all points are clustered, even those orphans that are not within any kernel. Orphans are assigned to the nearest kernel. If false, then orphans are given cluster label -1." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations, per seed point before the clustering operation terminates (for that seed point), if has not converged yet." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.17 Parallel Execution using *n_jobs*." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform mean shift clustering of data using a flat kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nbandwidth : float, default=None\n Kernel bandwidth.\n\n If bandwidth is not given, it is determined using a heuristic based on\n the median of all pairwise distances. This will take quadratic time in\n the number of samples. The sklearn.cluster.estimate_bandwidth function\n can be used to do this more efficiently.\n\nseeds : array-like of shape (n_seeds, n_features) or None\n Point used as initial kernel locations. If None and bin_seeding=False,\n each data point is used as a seed. If None and bin_seeding=True,\n see bin_seeding.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.17\n Parallel Execution using *n_jobs*.\n\nReturns\n-------\n\ncluster_centers : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_mean_shift.py\n`." - }, - { - "name": "get_bin_seeds", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input points, the same points that will be used in mean_shift." - }, - { - "name": "bin_size", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the coarseness of the binning. Smaller values lead to more seeding (which is computationally more expensive). If you're not sure how to set this, set it to the value of the bandwidth used in clustering.mean_shift." - }, - { - "name": "min_bin_freq", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Only bins with at least min_bin_freq will be selected as seeds. Raising this value decreases the number of seeds found, which makes mean_shift computationally cheaper." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds seeds for mean_shift.\n\nFinds seeds by first binning data onto a grid whose lines are\nspaced bin_size apart, and then choosing those bins with at least\nmin_bin_freq points.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input points, the same points that will be used in mean_shift.\n\nbin_size : float\n Controls the coarseness of the binning. Smaller values lead\n to more seeding (which is computationally more expensive). If you're\n not sure how to set this, set it to the value of the bandwidth used\n in clustering.mean_shift.\n\nmin_bin_freq : int, default=1\n Only bins with at least min_bin_freq will be selected as seeds.\n Raising this value decreases the number of seeds found, which\n makes mean_shift computationally cheaper.\n\nReturns\n-------\nbin_seeds : array-like of shape (n_samples, n_features)\n Points used as initial kernel positions in clustering.mean_shift." - } - ] - }, - { - "name": "sklearn.cluster._optics", - "imports": [ - "import warnings", - "import numpy as np", - "from utils import gen_batches", - "from utils import get_chunk_n_rows", - "from utils.validation import _deprecate_positional_args", - "from neighbors import NearestNeighbors", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from metrics import pairwise_distances" - ], - "classes": [ - { - "name": "OPTICS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "min_samples", - "type": null, - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of samples in a neighborhood for a point to be considered as a core point. Also, up and down steep regions can't have more than ``min_samples`` consecutive non-steep points. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)." - }, - { - "name": "max_eps", - "type": "float", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. Default value of ``np.inf`` will identify clusters across all scales; reducing ``max_eps`` will result in shorter run times." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "cluster_method", - "type": "str", - "hasDefault": true, - "default": "'xi'", - "limitation": null, - "ignored": false, - "docstring": "The extraction method used to extract clusters using the calculated reachability and ordering. Possible values are \"xi\" and \"dbscan\"." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. By default it assumes the same value as ``max_eps``. Used only when ``cluster_method='dbscan'``." - }, - { - "name": "xi", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi. Used only when ``cluster_method='xi'``." - }, - { - "name": "predecessor_correction", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Correct clusters according to the predecessors calculated by OPTICS [2]_. This parameter has minimal effect on most datasets. Used only when ``cluster_method='xi'``." - }, - { - "name": "min_cluster_size", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2). If ``None``, the value of ``min_samples`` is used instead. Used only when ``cluster_method='xi'``." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. (default) Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A feature array, or array of distances between samples if metric='precomputed'." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019\n A feature array, or array of distances between samples if\n metric='precomputed'.\n\ny : ignored\n Ignored.\n\nReturns\n-------\nself : instance of OPTICS\n The instance." - } - ], - "docstring": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmin_samples : int > 1 or float between 0 and 1, default=5\n The number of samples in a neighborhood for a point to be considered as\n a core point. Also, up and down steep regions can't have more than\n ``min_samples`` consecutive non-steep points. Expressed as an absolute\n number or a fraction of the number of samples (rounded to be at least\n 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncluster_method : str, default='xi'\n The extraction method used to extract clusters using the calculated\n reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\neps : float, default=None\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. By default it assumes the same value\n as ``max_eps``.\n Used only when ``cluster_method='dbscan'``.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n Used only when ``cluster_method='xi'``.\n\npredecessor_correction : bool, default=True\n Correct clusters according to the predecessors calculated by OPTICS\n [2]_. This parameter has minimal effect on most datasets.\n Used only when ``cluster_method='xi'``.\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n Used only when ``cluster_method='xi'``.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nlabels_ : ndarray of shape (n_samples,)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples and points which are not included in a leaf cluster\n of ``cluster_hierarchy_`` are labeled as -1.\n\nreachability_ : ndarray of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\nordering_ : ndarray of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : ndarray of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\ncluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to\n ``(end, -start)`` (ascending) so that larger clusters encompassing\n smaller clusters come after those smaller ones. Since ``labels_`` does\n not reflect the hierarchy, usually\n ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n note that these indices are of the ``ordering_``, i.e.\n ``X[ordering_][start:end + 1]`` form a cluster.\n Only available when ``cluster_method='xi'``.\n\nSee Also\n--------\nDBSCAN : A similar clustering for a specified neighborhood radius (eps).\n Our implementation is optimized for runtime.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n.. [2] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\nExamples\n--------\n>>> from sklearn.cluster import OPTICS\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 5], [3, 6],\n... [8, 7], [8, 8], [7, 3]])\n>>> clustering = OPTICS(min_samples=2).fit(X)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])" - } - ], - "functions": [ - { - "name": "_validate_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_core_distances_", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "neighbors", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The fitted nearest neighbors estimator." - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the k-th nearest neighbor of each sample\n\nEquivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]\nbut with more memory efficiency.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\nneighbors : NearestNeighbors instance\n The fitted nearest neighbors estimator.\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\ncore_distances : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point.\n Points which will never be core have a distance of inf." - }, - { - "name": "compute_optics_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A feature array, or array of distances between samples if metric='precomputed'" - }, - { - "name": "min_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples in a neighborhood for a point to be considered as a core point. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)." - }, - { - "name": "max_eps", - "type": "float", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. Default value of ``np.inf`` will identify clusters across all scales; reducing ``max_eps`` will result in shorter run times." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. (default) Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the OPTICS reachability graph.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019.\n A feature array, or array of distances between samples if\n metric='precomputed'\n\nmin_samples : int > 1 or float between 0 and 1\n The number of samples in a neighborhood for a point to be considered\n as a core point. Expressed as an absolute number or a fraction of the\n number of samples (rounded to be at least 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nordering_ : array of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : array of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\nreachability_ : array of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : array of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60." - }, - { - "name": "_set_reach_dist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "cluster_optics_dbscan", - "decorators": [], - "parameters": [ - { - "name": "reachability", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Reachability distances calculated by OPTICS (``reachability_``)" - }, - { - "name": "core_distances", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances at which points become core (``core_distances_``)" - }, - { - "name": "ordering", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OPTICS ordered point indices (``ordering_``)" - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close to one another." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs DBSCAN extraction for an arbitrary epsilon.\n\nExtracting the clusters runs in linear time. Note that this results in\n``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with\nsimilar settings and ``eps``, only if ``eps`` is close to ``max_eps``.\n\nParameters\n----------\nreachability : array of shape (n_samples,)\n Reachability distances calculated by OPTICS (``reachability_``)\n\ncore_distances : array of shape (n_samples,)\n Distances at which points become core (``core_distances_``)\n\nordering : array of shape (n_samples,)\n OPTICS ordered point indices (``ordering_``)\n\neps : float\n DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results\n will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close\n to one another.\n\nReturns\n-------\nlabels_ : array of shape (n_samples,)\n The estimated labels." - }, - { - "name": "cluster_optics_xi", - "decorators": [], - "parameters": [ - { - "name": "reachability", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Reachability distances calculated by OPTICS (`reachability_`)" - }, - { - "name": "predecessor", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predecessors calculated by OPTICS." - }, - { - "name": "ordering", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OPTICS ordered point indices (`ordering_`)" - }, - { - "name": "min_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)." - }, - { - "name": "min_cluster_size", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2). If ``None``, the value of ``min_samples`` is used instead." - }, - { - "name": "xi", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi." - }, - { - "name": "predecessor_correction", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Correct clusters based on the calculated predecessors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nParameters\n----------\nreachability : ndarray of shape (n_samples,)\n Reachability distances calculated by OPTICS (`reachability_`)\n\npredecessor : ndarray of shape (n_samples,)\n Predecessors calculated by OPTICS.\n\nordering : ndarray of shape (n_samples,)\n OPTICS ordered point indices (`ordering_`)\n\nmin_samples : int > 1 or float between 0 and 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n Expressed as an absolute number or a fraction of the number of samples\n (rounded to be at least 2).\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\npredecessor_correction : bool, default=True\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The labels assigned to samples. Points which are not included\n in any cluster are labeled as -1.\n\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to ``(end,\n -start)`` (ascending) so that larger clusters encompassing smaller\n clusters come after such nested smaller clusters. Since ``labels`` does\n not reflect the hierarchy, usually ``len(clusters) >\n np.unique(labels)``." - }, - { - "name": "_extend_region", - "decorators": [], - "parameters": [ - { - "name": "steep_point", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True if the point is steep downward (upward)." - }, - { - "name": "xward_point", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True if the point is an upward (respectively downward) point." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The start of the xward region." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n - steep_{upward/downward}: bool array indicating whether a point is a\n steep {upward/downward};\n - upward/downward: bool array indicating whether a point is\n upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.\n\nParameters\n----------\nsteep_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is steep downward (upward).\n\nxward_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is an upward (respectively downward) point.\n\nstart : int\n The start of the xward region.\n\nmin_samples : int\n The same as the min_samples given to OPTICS. Up and down steep\n regions can't have more then ``min_samples`` consecutive non-steep\n points.\n\nReturns\n-------\nindex : int\n The current index iterating over all the samples, i.e. where we are up\n to in our search.\n\nend : int\n The end of the region, which can be behind the index. The region\n includes the ``end`` index." - }, - { - "name": "_update_filter_sdas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update steep down areas (SDAs) using the new maximum in between (mib)\nvalue, and the given complement of xi, i.e. ``1 - xi``." - }, - { - "name": "_correct_predecessor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329." - }, - { - "name": "_xi_cluster", - "decorators": [], - "parameters": [ - { - "name": "reachability_plot", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The reachability plot, i.e. reachability ordered according to the calculated ordering, all computed by OPTICS." - }, - { - "name": "predecessor_plot", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predecessors ordered according to the calculated ordering." - }, - { - "name": "xi", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi." - }, - { - "name": "min_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points." - }, - { - "name": "min_cluster_size", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples in an OPTICS cluster." - }, - { - "name": "predecessor_correction", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Correct clusters based on the calculated predecessors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nThis is rouphly an implementation of Figure 19 of the OPTICS paper.\n\nParameters\n----------\nreachability_plot : array-like of shape (n_samples,)\n The reachability plot, i.e. reachability ordered according to\n the calculated ordering, all computed by OPTICS.\n\npredecessor_plot : array-like of shape (n_samples,)\n Predecessors ordered according to the calculated ordering.\n\nxi : float, between 0 and 1\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\nmin_samples : int > 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n\nmin_cluster_size : int > 1\n Minimum number of samples in an OPTICS cluster.\n\npredecessor_correction : bool\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of [start, end] in each row, with all\n indices inclusive. The clusters are ordered in a way that larger\n clusters encompassing smaller clusters come after those smaller\n clusters." - }, - { - "name": "_extract_xi_labels", - "decorators": [], - "parameters": [ - { - "name": "ordering", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ordering of points calculated by OPTICS" - }, - { - "name": "clusters", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of clusters i.e. (start, end) tuples, as returned by `_xi_cluster`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extracts the labels from the clusters returned by `_xi_cluster`.\nWe rely on the fact that clusters are stored\nwith the smaller clusters coming before the larger ones.\n\nParameters\n----------\nordering : array-like of shape (n_samples,)\n The ordering of points calculated by OPTICS\n\nclusters : array-like of shape (n_clusters, 2)\n List of clusters i.e. (start, end) tuples,\n as returned by `_xi_cluster`.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)" - } - ] - }, - { - "name": "sklearn.cluster._spectral", - "imports": [ - "import warnings", - "import numpy as np", - "from base import BaseEstimator", - "from base import ClusterMixin", - "from utils import check_random_state", - "from utils import as_float_array", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated", - "from metrics.pairwise import pairwise_kernels", - "from neighbors import kneighbors_graph", - "from neighbors import NearestNeighbors", - "from manifold import spectral_embedding", - "from _kmeans import k_means", - "from scipy.sparse import csc_matrix", - "from scipy.linalg import LinAlgError" - ], - "classes": [ - { - "name": "SpectralClustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The dimension of the projection subspace." - }, - { - "name": "eigen_solver", - "type": "Literal['arpack', 'lobpcg', 'amg']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "n_clusters", - "limitation": null, - "ignored": false, - "docstring": "Number of eigen vectors to use for the spectral embedding" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by the K-Means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels. Ignored for ``affinity='nearest_neighbors'``." - }, - { - "name": "affinity", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "How to construct the affinity matrix. - 'nearest_neighbors' : construct the affinity matrix by computing a graph of nearest neighbors. - 'rbf' : construct the affinity matrix using a radial basis function (RBF) kernel. - 'precomputed' : interpret ``X`` as a precomputed affinity matrix. - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph of precomputed nearest neighbors, and constructs the affinity matrix by selecting the ``n_neighbors`` nearest neighbors. - one of the kernels supported by :func:`~sklearn.metrics.pairwise_kernels`. Only kernels that produce similarity scores (non-negative values that increase with similarity) should be used. This property is not checked by the clustering algorithm." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for ``affinity='rbf'``." - }, - { - "name": "eigen_tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when ``eigen_solver='arpack'``." - }, - { - "name": "assign_labels", - "type": "Literal['kmeans', 'discretize']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The strategy to use to assign labels in the embedding space. There are two ways to assign labels after the laplacian embedding. k-means can be applied and is a popular choice. But it can also be sensitive to initialization. Discretization is another approach which is less sensitive to random initialization." - }, - { - "name": "degree", - "type": "float", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run when `affinity='nearest_neighbors'` or `affinity='precomputed_nearest_neighbors'`. The neighbors search will be done in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse matrix is provided in a format other than ``csr_matrix``, ``csc_matrix``, or ``coo_matrix``, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform spectral clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse matrix is\n provided in a format other than ``csr_matrix``, ``csc_matrix``,\n or ``coo_matrix``, it will be converted into a sparse\n ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse matrix is provided in a format other than ``csr_matrix``, ``csc_matrix``, or ``coo_matrix``, it will be converted into a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present here for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform spectral clustering from features, or affinity matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse matrix is\n provided in a format other than ``csr_matrix``, ``csc_matrix``,\n or ``coo_matrix``, it will be converted into a sparse\n ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\nkernel function such the Gaussian (aka RBF) kernel of the euclidean\ndistanced ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, using ``precomputed``, a user-provided affinity\nmatrix can be used.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nn_components : int, default=n_clusters\n Number of eigen vectors to use for the spectral embedding\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\ngamma : float, default=1.0\n Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix using a radial basis function\n (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - one of the kernels supported by\n :func:`~sklearn.metrics.pairwise_kernels`.\n\n Only kernels that produce similarity scores (non-negative values that\n increase with similarity) should be used. This property is not checked\n by the clustering algorithm.\n\nn_neighbors : int, default=10\n Number of neighbors to use when constructing the affinity matrix using\n the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when ``eigen_solver='arpack'``.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another approach\n which is less sensitive to random initialization.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n Parameters (keyword arguments) and values for kernel passed as\n callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n The number of parallel jobs to run when `affinity='nearest_neighbors'`\n or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n will be done in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n Affinity matrix used for clustering. Available only if after calling\n ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n... assign_labels=\"discretize\",\n... random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n random_state=0)\n\nNotes\n-----\nIf you have an affinity matrix, such as a distance matrix,\nfor which 0 means identical elements, and high values means\nvery dissimilar elements, it can be transformed in a\nsimilarity matrix that is well suited for the algorithm by\napplying the Gaussian (RBF, heat) kernel::\n\n np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nWhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAnother alternative is to take a symmetric version of the k\nnearest neighbors connectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf" - } - ], - "functions": [ - { - "name": "discretize", - "decorators": [], - "parameters": [ - { - "name": "vectors", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The embedding space of the samples." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy vectors, or perform in-place normalization." - }, - { - "name": "max_svd_restarts", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of attempts to restart SVD if convergence fails" - }, - { - "name": "n_iter_max", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to attempt in rotation and partition matrix search if machine precision convergence is not reached" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for rotation matrix initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Search for a partition matrix (clustering) which is closest to the\neigenvector embedding.\n\nParameters\n----------\nvectors : array-like of shape (n_samples, n_clusters)\n The embedding space of the samples.\n\ncopy : bool, default=True\n Whether to copy vectors, or perform in-place normalization.\n\nmax_svd_restarts : int, default=30\n Maximum number of attempts to restart SVD if convergence fails\n\nn_iter_max : int, default=30\n Maximum number of iterations to attempt in rotation and partition\n matrix search if machine precision convergence is not reached\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for rotation matrix initialization.\n Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\n\nThe eigenvector embedding is used to iteratively search for the\nclosest discrete partition. First, the eigenvector embedding is\nnormalized to the space of partition matrices. An optimal discrete\npartition matrix closest to this normalized embedding multiplied by\nan initial rotation is calculated. Fixing this discrete partition\nmatrix, an optimal rotation matrix is calculated. These two\ncalculations are performed until convergence. The discrete partition\nmatrix is returned as the clustering solution. Used in spectral\nclustering, this method tends to be faster and more robust to random\ninitialization than k-means." - }, - { - "name": "spectral_clustering", - "decorators": [], - "parameters": [ - { - "name": "affinity", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The affinity matrix describing the relationship of the samples to embed. **Must be symmetric**. Possible examples: - adjacency matrix of a graph, - heat kernel of the pairwise distance matrix of the samples, - symmetric k-nearest neighbours connectivity matrix of the samples." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of clusters to extract." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "n_clusters", - "limitation": null, - "ignored": false, - "docstring": "Number of eigen vectors to use for the spectral embedding" - }, - { - "name": "eigen_solver", - "type": "Optional[Literal['arpack', 'lobpcg']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by the K-Means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia." - }, - { - "name": "eigen_tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when using arpack eigen_solver." - }, - { - "name": "assign_labels", - "type": "Literal['kmeans', 'discretize']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The strategy to use to assign labels in the embedding space. There are two ways to assign labels after the laplacian embedding. k-means can be applied and is a popular choice. But it can also be sensitive to initialization. Discretization is another approach which is less sensitive to random initialization. See the 'Multiclass spectral clustering' paper referenced below for more details on the discretization approach." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity mode. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\naffinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n The affinity matrix describing the relationship of the samples to\n embed. **Must be symmetric**.\n\n Possible examples:\n - adjacency matrix of a graph,\n - heat kernel of the pairwise distance matrix of the samples,\n - symmetric k-nearest neighbours connectivity matrix of the samples.\n\nn_clusters : int, default=None\n Number of clusters to extract.\n\nn_components : int, default=n_clusters\n Number of eigen vectors to use for the spectral embedding\n\neigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another\n approach which is less sensitive to random initialization. See\n the 'Multiclass spectral clustering' paper referenced below for\n more details on the discretization approach.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\nThe graph should contain only one connect component, elsewhere\nthe results make little sense.\n\nThis algorithm solves the normalized cut for k=2: it is a\nnormalized spectral clustering." - } - ] - }, - { - "name": "sklearn.cluster", - "imports": [ - "from _spectral import spectral_clustering", - "from _spectral import SpectralClustering", - "from _mean_shift import mean_shift", - "from _mean_shift import MeanShift", - "from _mean_shift import estimate_bandwidth", - "from _mean_shift import get_bin_seeds", - "from _affinity_propagation import affinity_propagation", - "from _affinity_propagation import AffinityPropagation", - "from _agglomerative import ward_tree", - "from _agglomerative import AgglomerativeClustering", - "from _agglomerative import linkage_tree", - "from _agglomerative import FeatureAgglomeration", - "from _kmeans import k_means", - "from _kmeans import KMeans", - "from _kmeans import MiniBatchKMeans", - "from _kmeans import kmeans_plusplus", - "from _dbscan import dbscan", - "from _dbscan import DBSCAN", - "from _optics import OPTICS", - "from _optics import cluster_optics_dbscan", - "from _optics import compute_optics_graph", - "from _optics import cluster_optics_xi", - "from _bicluster import SpectralBiclustering", - "from _bicluster import SpectralCoclustering", - "from _birch import Birch" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.cluster.tests.common", - "imports": [ - "import numpy as np" - ], - "classes": [], - "functions": [ - { - "name": "generate_clustered_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_affinity_propagation", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csr_matrix", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.cluster import AffinityPropagation", - "from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences", - "from sklearn.cluster import affinity_propagation", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import euclidean_distances" - ], - "classes": [], - "functions": [ - { - "name": "test_affinity_propagation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_predict_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_fit_non_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_equal_mutual_similarities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_predict_non_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_non_convergence_regressiontest", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_equal_similarities_and_preferences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_random_state_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_convergence_warning_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Non-regression, see #13334" - }, - { - "name": "test_affinity_propagation_float32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_propagation_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_bicluster", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.base import BaseEstimator", - "from sklearn.base import BiclusterMixin", - "from sklearn.cluster import SpectralCoclustering", - "from sklearn.cluster import SpectralBiclustering", - "from sklearn.cluster._bicluster import _scale_normalize", - "from sklearn.cluster._bicluster import _bistochastic_normalize", - "from sklearn.cluster._bicluster import _log_normalize", - "from sklearn.metrics import consensus_score", - "from sklearn.metrics import v_measure_score", - "from sklearn.datasets import make_biclusters", - "from sklearn.datasets import make_checkerboard" - ], - "classes": [ - { - "name": "MockBiclustering", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_get_submatrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_shape_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_coclustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_biclustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_do_scale_test", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that rows sum to one constant, and columns to another." - }, - { - "name": "_do_bistochastic_test", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that rows and columns sum to the same constant." - }, - { - "name": "test_scale_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bistochastic_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_best_piecewise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_project_and_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perfect_checkerboard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_jobs_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_birch", - "imports": [ - "from scipy import sparse", - "import numpy as np", - "import pytest", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.cluster import Birch", - "from sklearn.cluster import AgglomerativeClustering", - "from sklearn.datasets import make_blobs", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model import ElasticNet", - "from sklearn.metrics import pairwise_distances_argmin", - "from sklearn.metrics import v_measure_score", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns" - ], - "classes": [], - "functions": [ - { - "name": "test_n_samples_leaves_roots", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_birch_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_second_call_error_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_branching_factor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_branching_factor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Use the leaf linked list for traversal" - }, - { - "name": "test_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_birch_n_clusters_long_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_dbscan", - "imports": [ - "import pickle", - "import numpy as np", - "import warnings", - "from scipy.spatial import distance", - "from scipy import sparse", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.cluster import DBSCAN", - "from sklearn.cluster import dbscan", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.metrics.pairwise import pairwise_distances" - ], - "classes": [], - "functions": [ - { - "name": "test_dbscan_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_sparse_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_sparse_precomputed_different_eps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_input_not_modified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_no_core_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_metric_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_balltree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_badargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_boundaries", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_dbscan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_core_samples_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_precomputed_metric_with_degenerate_input_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_precomputed_metric_with_initial_rows_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_feature_agglomeration", - "imports": [ - "import numpy as np", - "from sklearn.cluster import FeatureAgglomeration", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_array_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_feature_agglomeration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_hierarchical", - "imports": [ - "from tempfile import mkdtemp", - "import shutil", - "import pytest", - "from functools import partial", - "import numpy as np", - "from scipy import sparse", - "from scipy.cluster import hierarchy", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.cluster import ward_tree", - "from sklearn.cluster import AgglomerativeClustering", - "from sklearn.cluster import FeatureAgglomeration", - "from sklearn.cluster._agglomerative import _hc_cut", - "from sklearn.cluster._agglomerative import _TREE_BUILDERS", - "from sklearn.cluster._agglomerative import linkage_tree", - "from sklearn.cluster._agglomerative import _fix_connectivity", - "from sklearn.feature_extraction.image import grid_to_graph", - "from sklearn.metrics.pairwise import PAIRED_DISTANCES", - "from sklearn.metrics.pairwise import cosine_distances", - "from sklearn.metrics.pairwise import manhattan_distances", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.metrics.cluster import normalized_mutual_info_score", - "from sklearn.neighbors import kneighbors_graph", - "from sklearn.cluster._hierarchical_fast import average_merge", - "from sklearn.cluster._hierarchical_fast import max_merge", - "from sklearn.utils._fast_dict import IntFloatDict", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.datasets import make_moons", - "from sklearn.datasets import make_circles" - ], - "classes": [], - "functions": [ - { - "name": "test_linkage_misc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_structured_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unstructured_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_height_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_wrong_arg_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_cosine_linkage_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ward_agglomeration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_linkage_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assess_same_labelling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Util for comparison with scipy" - }, - { - "name": "test_sparse_scikit_vs_scipy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vector_scikit_single_vs_scipy_single", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_identical_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_propagation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ward_tree_children_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ward_linkage_tree_return_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_fixing_non_lil", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_int_float_dict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity_ignores_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_full_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agg_n_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinity_passed_to_fix_connectivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_with_distance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_distance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_distances_with_distance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_agglomerative_clustering_with_distance_threshold_edge_case", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dist_threshold_invalid_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_shape_precomputed_dist_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_k_means", - "imports": [ - "import re", - "import sys", - "import numpy as np", - "from scipy import sparse as sp", - "from threadpoolctl import threadpool_limits", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils.fixes import _astype_copy_false", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils.extmath import row_norms", - "from sklearn.metrics import pairwise_distances", - "from sklearn.metrics import pairwise_distances_argmin", - "from sklearn.metrics.cluster import v_measure_score", - "from sklearn.cluster import KMeans", - "from sklearn.cluster import k_means", - "from sklearn.cluster import kmeans_plusplus", - "from sklearn.cluster import MiniBatchKMeans", - "from sklearn.cluster._kmeans import _labels_inertia", - "from sklearn.cluster._kmeans import _mini_batch_step", - "from sklearn.cluster._k_means_fast import _relocate_empty_clusters_dense", - "from sklearn.cluster._k_means_fast import _relocate_empty_clusters_sparse", - "from sklearn.cluster._k_means_fast import _euclidean_dense_dense_wrapper", - "from sklearn.cluster._k_means_fast import _euclidean_sparse_dense_wrapper", - "from sklearn.cluster._k_means_fast import _inertia_dense", - "from sklearn.cluster._k_means_fast import _inertia_sparse", - "from sklearn.datasets import make_blobs", - "from io import StringIO" - ], - "classes": [], - "functions": [ - { - "name": "test_kmeans_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_relocated_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_relocate_empty_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_elkan_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_update_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_fitted_model", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_partial_fit_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fortran_aligned_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_means_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_warning_init_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_n_init_precomputed_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_sensible_reassign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_reassign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_with_many_reassignments", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_init_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_copyx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_integer_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_means_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_float_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_centers_not_mutated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_init_fitted_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_warns_less_centers_than_unique_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sort_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_vs_repeated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unit_weights_vs_no_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaled_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_elkan_iter_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_empty_cluster_relocated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_result_of_kmeans_equal_in_diff_n_threads", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precompute_distance_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_jobs_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_deprecated_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_elkan_1_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_means_1_iteration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inertia", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_kmeans_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_wrong_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kmeans_plusplus_dataorder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_mean_shift", - "imports": [ - "import numpy as np", - "import warnings", - "import pytest", - "from scipy import sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.cluster import MeanShift", - "from sklearn.cluster import mean_shift", - "from sklearn.cluster import estimate_bandwidth", - "from sklearn.cluster import get_bin_seeds", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import v_measure_score" - ], - "classes": [], - "functions": [ - { - "name": "test_estimate_bandwidth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimate_bandwidth_1sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_shift", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_shift_negative_bandwidth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimate_bandwidth_with_sparse_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_meanshift_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_meanshift_all_orphans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_intensity_tie", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_seeds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_shift_zero_bandwidth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_optics", - "imports": [ - "import platform", - "import sys", - "import numpy as np", - "import pytest", - "from sklearn.datasets import make_blobs", - "from sklearn.cluster import OPTICS", - "from sklearn.cluster._optics import _extend_region", - "from sklearn.cluster._optics import _extract_xi_labels", - "from sklearn.metrics.cluster import contingency_matrix", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.cluster import DBSCAN", - "from sklearn.utils import shuffle", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.utils import _IS_32BIT" - ], - "classes": [], - "functions": [ - { - "name": "test_extend_downward", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extend_upward", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_the_extract_xi_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_xi", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_hierarchy_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_number_of_clusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minimum_number_of_sample_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_extract", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_reachability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_close_extract", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan_optics_parity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_edge_case", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_cluster_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_cluster_size_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_cluster_size_invalid2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_processing_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compare_to_ELKI", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_cluster_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_dbscan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_dists", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests.test_spectral", - "imports": [ - "import re", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "import pickle", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.cluster import SpectralClustering", - "from sklearn.cluster import spectral_clustering", - "from sklearn.cluster._spectral import discretize", - "from sklearn.feature_extraction import img_to_graph", - "from sklearn.metrics import pairwise_distances", - "from sklearn.metrics import adjusted_rand_score", - "from sklearn.metrics.pairwise import kernel_metrics", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.datasets import make_blobs", - "from pyamg import smoothed_aggregation_solver" - ], - "classes": [], - "functions": [ - { - "name": "test_spectral_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_unknown_mode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_unknown_assign_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_clustering_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_nearest_neighbors_filtering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_affinities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_clustering_with_arpack_amg_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cluster.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.compose._column_transformer", - "imports": [ - "from itertools import chain", - "import numpy as np", - "from scipy import sparse", - "from joblib import Parallel", - "from base import clone", - "from base import TransformerMixin", - "from utils._estimator_html_repr import _VisualBlock", - "from pipeline import _fit_transform_one", - "from pipeline import _transform_one", - "from pipeline import _name_estimators", - "from preprocessing import FunctionTransformer", - "from utils import Bunch", - "from utils import _safe_indexing", - "from utils import _get_column_indices", - "from utils import _determine_key_type", - "from utils.metaestimators import _BaseComposition", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "ColumnTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "transformers", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of (name, transformer, columns) tuples specifying the transformer objects to be applied to subsets of the data. name : str Like in Pipeline and FeatureUnion, this allows the transformer and its parameters to be set using ``set_params`` and searched in grid search. transformer : {'drop', 'passthrough'} or estimator Estimator must support :term:`fit` and :term:`transform`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns or to pass them through untransformed, respectively. columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable Indexes the data on its second axis. Integers are interpreted as positional columns, while strings can reference DataFrame columns by name. A scalar string or int should be used where ``transformer`` expects X to be a 1d array-like (vector), otherwise a 2d array will be passed to the transformer. A callable is passed the input data `X` and can return any of the above. To select multiple columns by name or dtype, you can use :obj:`make_column_selector`." - }, - { - "name": "remainder", - "type": "Literal['drop', 'passthrough']", - "hasDefault": true, - "default": "'drop'", - "limitation": null, - "ignored": false, - "docstring": "By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`. Note that using this feature requires that the DataFrame columns input at :term:`fit` and :term:`transform` have identical order." - }, - { - "name": "sparse_threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If the output of the different transformers contains sparse matrices, these will be stacked as a sparse matrix if the overall density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all dense data, the stacked result will be dense, and this keyword will be ignored." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "transformer_weights", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multiplicative weights for features per transformer. The output of the transformer is multiplied by these weights. Keys are transformer names, values the weights." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformers` of the\n`ColumnTransformer`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that you\ncan directly set the parameters of the estimators contained in\n`transformers` of `ColumnTransformer`.\n\nReturns\n-------\nself" - }, - { - "name": "_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder." - }, - { - "name": "_validate_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_column_callables", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Converts callable column specifications." - }, - { - "name": "_validate_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validates ``remainder`` and defines ``_remainder`` targeting\nthe remaining columns." - }, - { - "name": "named_transformers_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Access the fitted transformer by name.\n\nRead-only attribute to access any transformer by given name.\nKeys are transformer names and values are the fitted transformer\nobjects." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform." - }, - { - "name": "_update_fitted_transformers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure that the output of each transformer is 2D. Otherwise\nhstack can raise an error or produce incorrect results." - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function to fit and/or transform on demand.\n\nReturn value (transformers and/or transformed X data) depends\non the passed function.\n``fitted=True`` ensures the fitted transformers are used." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, of which specified subsets are used to fit the transformers." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,...), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : ColumnTransformer\n This estimator" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, of which specified subsets are used to fit the transformers." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed by subset." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n The data to be transformed by subset.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices." - }, - { - "name": "_hstack", - "decorators": [], - "parameters": [ - { - "name": "Xs", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.\n\nParameters\n----------\nXs : list of {array-like, sparse matrix, dataframe}" - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ntransformers : list of tuples\n List of (name, transformer, columns) tuples specifying the\n transformer objects to be applied to subsets of the data.\n\n name : str\n Like in Pipeline and FeatureUnion, this allows the transformer and\n its parameters to be set using ``set_params`` and searched in grid\n search.\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n Note that using this feature requires that the DataFrame columns\n input at :term:`fit` and :term:`transform` have identical order.\n\nsparse_threshold : float, default=0.3\n If the output of the different transformers contains sparse matrices,\n these will be stacked as a sparse matrix if the overall density is\n lower than this value. Use ``sparse_threshold=0`` to always return\n dense. When the transformed output consists of all dense data, the\n stacked result will be dense, and this keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer. The output of the\n transformer is multiplied by these weights. Keys are transformer names,\n values the weights.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nAttributes\n----------\ntransformers_ : list\n The collection of fitted transformers as tuples of\n (name, fitted_transformer, column). `fitted_transformer` can be an\n estimator, 'drop', or 'passthrough'. In case there were no columns\n selected, this will be the unfitted transformer.\n If there are remaining columns, the final element is a tuple of the\n form:\n ('remainder', transformer, remaining_columns) corresponding to the\n ``remainder`` parameter. If there are remaining columns, then\n ``len(transformers_)==len(transformers)+1``, otherwise\n ``len(transformers_)==len(transformers)``.\n\nnamed_transformers_ : :class:`~sklearn.utils.Bunch`\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\nsparse_output_ : bool\n Boolean flag indicating whether the output of ``transform`` is a\n sparse matrix or a dense numpy array, which depends on the output\n of the individual transformers and the `sparse_threshold` keyword.\n\nNotes\n-----\nThe order of the columns in the transformed feature matrix follows the\norder of how the columns are specified in the `transformers` list.\nColumns of the original feature matrix that are not specified are\ndropped from the resulting transformed feature matrix, unless specified\nin the `passthrough` keyword. Those columns specified with `passthrough`\nare added at the right to the output of the transformers.\n\nSee Also\n--------\nmake_column_transformer : Convenience function for\n combining the outputs of multiple transformer objects applied to\n column subsets of the original feature space.\nmake_column_selector : Convenience function for selecting\n columns based on datatype or the columns name with a regex pattern.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.compose import ColumnTransformer\n>>> from sklearn.preprocessing import Normalizer\n>>> ct = ColumnTransformer(\n... [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n... (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n>>> X = np.array([[0., 1., 2., 2.],\n... [1., 1., 0., 1.]])\n>>> # Normalizer scales each row of X to unit norm. A separate scaling\n>>> # is applied for the two first and two last elements of each\n>>> # row independently.\n>>> ct.fit_transform(X)\narray([[0. , 1. , 0.5, 0.5],\n [0.5, 0.5, 0. , 1. ]])" - }, - { - "name": "make_column_selector", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "pattern", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of columns containing this regex pattern will be included. If None, column selection will not be selected based on pattern." - }, - { - "name": "dtype_include", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A selection of dtypes to include. For more details, see :meth:`pandas.DataFrame.select_dtypes`." - }, - { - "name": "dtype_exclude", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A selection of dtypes to exclude. For more details, see :meth:`pandas.DataFrame.select_dtypes`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Create a callable to select columns to be used with\n:class:`ColumnTransformer`.\n\n:func:`make_column_selector` can select columns based on datatype or the\ncolumns name with a regex. When using multiple selection criteria, **all**\ncriteria must match for a column to be selected.\n\nParameters\n----------\npattern : str, default=None\n Name of columns containing this regex pattern will be included. If\n None, column selection will not be selected based on pattern.\n\ndtype_include : column dtype or list of column dtypes, default=None\n A selection of dtypes to include. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\ndtype_exclude : column dtype or list of column dtypes, default=None\n A selection of dtypes to exclude. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\nReturns\n-------\nselector : callable\n Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> from sklearn.compose import make_column_selector\n>>> import pandas as pd # doctest: +SKIP\n>>> X = pd.DataFrame({'city': ['London', 'London', 'Paris', 'Sallisaw'],\n... 'rating': [5, 3, 4, 5]}) # doctest: +SKIP\n>>> ct = make_column_transformer(\n... (StandardScaler(),\n... make_column_selector(dtype_include=np.number)), # rating\n... (OneHotEncoder(),\n... make_column_selector(dtype_include=object))) # city\n>>> ct.fit_transform(X) # doctest: +SKIP\narray([[ 0.90453403, 1. , 0. , 0. ],\n [-1.50755672, 1. , 0. , 0. ],\n [-0.30151134, 0. , 1. , 0. ],\n [ 0.90453403, 0. , 0. , 1. ]])" - } - ], - "functions": [ - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Use check_array only on lists and other non-array-likes / sparse" - }, - { - "name": "_is_empty_column_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the column selection is empty (empty list or all-False\nboolean array)." - }, - { - "name": "_get_transformer_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Construct (name, trans, column) tuples from list" - }, - { - "name": "make_column_transformer", - "decorators": [], - "parameters": [ - { - "name": "*transformers", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tuples of the form (transformer, columns) specifying the transformer objects to be applied to subsets of the data. transformer : {'drop', 'passthrough'} or estimator Estimator must support :term:`fit` and :term:`transform`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns or to pass them through untransformed, respectively. columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable Indexes the data on its second axis. Integers are interpreted as positional columns, while strings can reference DataFrame columns by name. A scalar string or int should be used where ``transformer`` expects X to be a 1d array-like (vector), otherwise a 2d array will be passed to the transformer. A callable is passed the input data `X` and can return any of the above. To select multiple columns by name or dtype, you can use :obj:`make_column_selector`." - }, - { - "name": "remainder", - "type": "Literal['drop', 'passthrough']", - "hasDefault": true, - "default": "'drop'", - "limitation": null, - "ignored": false, - "docstring": "By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`." - }, - { - "name": "sparse_threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If the transformed output consists of a mix of sparse and dense data, it will be stacked as a sparse matrix if the density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all sparse or all dense data, the stacked result will be sparse or dense, respectively, and this keyword will be ignored." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct a ColumnTransformer from the given transformers.\n\nThis is a shorthand for the ColumnTransformer constructor; it does not\nrequire, and does not permit, naming the transformers. Instead, they will\nbe given names automatically based on their types. It also does not allow\nweighting with ``transformer_weights``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*transformers : tuples\n Tuples of the form (transformer, columns) specifying the\n transformer objects to be applied to subsets of the data.\n\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n\nsparse_threshold : float, default=0.3\n If the transformed output consists of a mix of sparse and dense data,\n it will be stacked as a sparse matrix if the density is lower than this\n value. Use ``sparse_threshold=0`` to always return dense.\n When the transformed output consists of all sparse or all dense data,\n the stacked result will be sparse or dense, respectively, and this\n keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nct : ColumnTransformer\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> make_column_transformer(\n... (StandardScaler(), ['numerical_column']),\n... (OneHotEncoder(), ['categorical_column']))\nColumnTransformer(transformers=[('standardscaler', StandardScaler(...),\n ['numerical_column']),\n ('onehotencoder', OneHotEncoder(...),\n ['categorical_column'])])" - } - ] - }, - { - "name": "sklearn.compose._target", - "imports": [ - "import warnings", - "import numpy as np", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import clone", - "from utils.validation import check_is_fitted", - "from utils import check_array", - "from utils import _safe_indexing", - "from preprocessing import FunctionTransformer", - "from utils.validation import _deprecate_positional_args", - "from exceptions import NotFittedError", - "from linear_model import LinearRegression" - ], - "classes": [ - { - "name": "TransformedTargetRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "regressor", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regressor object such as derived from ``RegressorMixin``. This regressor will automatically be cloned each time prior to fitting. If regressor is ``None``, ``LinearRegression()`` is created and used." - }, - { - "name": "transformer", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object such as derived from ``TransformerMixin``. Cannot be set at the same time as ``func`` and ``inverse_func``. If ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``, the transformer will be an identity transformer. Note that the transformer will be cloned during fitting. Also, the transformer is restricting ``y`` to be a numpy array." - }, - { - "name": "func", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function to apply to ``y`` before passing to ``fit``. Cannot be set at the same time as ``transformer``. The function needs to return a 2-dimensional array. If ``func`` is ``None``, the function used will be the identity function." - }, - { - "name": "inverse_func", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function to apply to the prediction of the regressor. Cannot be set at the same time as ``transformer`` as well. The function needs to return a 2-dimensional array. The inverse function is used to return predictions to the same space of the original training labels." - }, - { - "name": "check_inverse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to check that ``transform`` followed by ``inverse_transform`` or ``func`` followed by ``inverse_func`` leads to the original targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check transformer and fit transformer.\n\nCreate the default transformer, fit it and make additional inverse\ncheck on a subset (optional)." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the underlying regressor." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\n**fit_params : dict\n Parameters passed to the ``fit`` method of the underlying\n regressor.\n\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the base regressor, applying inverse.\n\nThe regressor is used to predict and the ``inverse_func`` or\n``inverse_transform`` is applied before returning the prediction.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_hat : ndarray of shape (n_samples,)\n Predicted values." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target ``y`` in\nregression problems. This transformation can be given as a Transformer\nsuch as the QuantileTransformer or as a function and its inverse such as\n``log`` and ``exp``.\n\nThe computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\nor::\n\n regressor.fit(X, transformer.transform(y))\n\nThe computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\nor::\n\n transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nregressor : object, default=None\n Regressor object such as derived from ``RegressorMixin``. This\n regressor will automatically be cloned each time prior to fitting.\n If regressor is ``None``, ``LinearRegression()`` is created and used.\n\ntransformer : object, default=None\n Estimator object such as derived from ``TransformerMixin``. Cannot be\n set at the same time as ``func`` and ``inverse_func``. If\n ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,\n the transformer will be an identity transformer. Note that the\n transformer will be cloned during fitting. Also, the transformer is\n restricting ``y`` to be a numpy array.\n\nfunc : function, default=None\n Function to apply to ``y`` before passing to ``fit``. Cannot be set at\n the same time as ``transformer``. The function needs to return a\n 2-dimensional array. If ``func`` is ``None``, the function used will be\n the identity function.\n\ninverse_func : function, default=None\n Function to apply to the prediction of the regressor. Cannot be set at\n the same time as ``transformer`` as well. The function needs to return\n a 2-dimensional array. The inverse function is used to return\n predictions to the same space of the original training labels.\n\ncheck_inverse : bool, default=True\n Whether to check that ``transform`` followed by ``inverse_transform``\n or ``func`` followed by ``inverse_func`` leads to the original targets.\n\nAttributes\n----------\nregressor_ : object\n Fitted regressor.\n\ntransformer_ : object\n Transformer used in ``fit`` and ``predict``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.compose import TransformedTargetRegressor\n>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n... func=np.log, inverse_func=np.exp)\n>>> X = np.arange(4).reshape(-1, 1)\n>>> y = np.exp(2 * X).ravel()\n>>> tt.fit(X, y)\nTransformedTargetRegressor(...)\n>>> tt.score(X, y)\n1.0\n>>> tt.regressor_.coef_\narray([2.])\n\nNotes\n-----\nInternally, the target ``y`` is always converted into a 2-dimensional array\nto be used by scikit-learn transformers. At the time of prediction, the\noutput will be reshaped to a have the same number of dimensions as ``y``.\n\nSee :ref:`examples/compose/plot_transformed_target.py\n`." - } - ], - "functions": [] - }, - { - "name": "sklearn.compose", - "imports": [ - "from _column_transformer import ColumnTransformer", - "from _column_transformer import make_column_transformer", - "from _column_transformer import make_column_selector", - "from _target import TransformedTargetRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.compose.tests.test_column_transformer", - "imports": [ - "import re", - "import pickle", - "import warnings", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "from numpy.testing import assert_allclose", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.base import BaseEstimator", - "from sklearn.compose import ColumnTransformer", - "from sklearn.compose import make_column_transformer", - "from sklearn.compose import make_column_selector", - "from sklearn.exceptions import NotFittedError", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import Normalizer", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.feature_extraction import DictVectorizer" - ], - "classes": [ - { - "name": "Trans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DoubleTrans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SparseMatrixTrans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TransNo2D", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TransRaise", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_column_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_empty_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_stacking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_mixed_cols_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_error_msg_1D", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2D_transformer_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2D_transformer_output_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_invalid_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_invalid_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_transformer_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_named_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_cloning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_feature_names_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_special_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder_numpy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_no_remaining_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_drops_all_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_sparse_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_drop_all_sparse_remainder_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_get_set_params_with_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_no_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_no_estimators_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_callable_specifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_callable_specifier_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_negative_column_indexes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_reordered_column_names_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Regression test for issue #14223: 'Named col indexing fails with\nColumnTransformer remainder on changing DataFrame column ordering'\n\nShould raise error on changed order combined with remainder.\nShould allow for added columns in `transform` input DataFrame\nas long as all preceding columns match." - }, - { - "name": "test_feature_name_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests if the proper warning/error is raised if the columns do not match\nduring fit and transform." - }, - { - "name": "test_column_transformer_mask_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_selector_with_select_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_transformer_with_make_column_selector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_selector_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_column_selector_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_names_empty_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder_fitted_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sk_visual_block_remainder_fitted_numpy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.compose.tests.test_target", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.base import clone", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.dummy import DummyRegressor", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.pipeline import Pipeline", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn import datasets", - "from sklearn.compose import TransformedTargetRegressor" - ], - "classes": [ - { - "name": "DummyCheckerArrayTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyCheckerListRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy transformer which count how many time fit was called." - }, - { - "name": "DummyRegressorWithExtraFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_transform_target_regressor_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_invertible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_standard_scaled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_shifted_by_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_functions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_functions_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_1d_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_2d_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_2d_transformer_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_multi_to_single", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_ensure_y_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_count_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_pass_fit_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_target_regressor_route_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.compose.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.covariance._elliptic_envelope", - "imports": [ - "import numpy as np", - "from None import MinCovDet", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from metrics import accuracy_score", - "from base import OutlierMixin" - ], - "classes": [ - { - "name": "EllipticEnvelope", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the support of robust location and covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment." - }, - { - "name": "support_fraction", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportion of points to be included in the support of the raw MCD estimate. If None, the minimum value of support_fraction will be used within the algorithm: `[n_sample + n_features + 1] / 2`. Range is (0, 1)." - }, - { - "name": "contamination", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Range is (0, 0.5)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the EllipticEnvelope model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of the given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,)\n Decision function of the samples.\n It is equal to the shifted Mahalanobis distances.\n The threshold for being an outlier is 0, which ensures a\n compatibility with other outlier detection algorithms." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative Mahalanobis distances.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nnegative_mahal_distances : array-like of shape (n_samples,)\n Opposite of the Mahalanobis distances." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to the\nfitted model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) w.r.t. y." - } - ], - "docstring": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of robust location and covariance estimates\n is computed, and a covariance estimate is recomputed from it,\n without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. If None, the minimum value of support_fraction will\n be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n Range is (0, 1).\n\ncontamination : float, default=0.1\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Range is (0, 0.5).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling\n the data. Pass an int for reproducible results across multiple function\n calls. See :term: `Glossary `.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute the\n robust estimates of location and shape.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: ``decision_function = score_samples - offset_``.\n The offset depends on the contamination parameter and is defined in\n such a way we obtain the expected number of outliers (samples with\n decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EllipticEnvelope\n>>> true_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n... cov=true_cov,\n... size=500)\n>>> cov = EllipticEnvelope(random_state=0).fit(X)\n>>> # predict returns 1 for an inlier and -1 for an outlier\n>>> cov.predict([[0, 0],\n... [3, 3]])\narray([ 1, -1])\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nSee Also\n--------\nEmpiricalCovariance, MinCovDet\n\nNotes\n-----\nOutlier detection from covariance estimation may break or not\nperform well in high-dimensional settings. In particular, one will\nalways take care to work with ``n_samples > n_features ** 2``.\n\nReferences\n----------\n.. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n minimum covariance determinant estimator\" Technometrics 41(3), 212\n (1999)" - } - ], - "functions": [] - }, - { - "name": "sklearn.covariance._empirical_covariance", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from utils import check_array", - "from utils.extmath import fast_logdet", - "from metrics.pairwise import pairwise_distances", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "EmpiricalCovariance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data are centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_covariance", - "decorators": [], - "parameters": [ - { - "name": "covariance", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated covariance matrix to be stored, and from which precision is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Saves the covariance and precision estimates\n\nStorage is done accordingly to `self.store_precision`.\nPrecision stored only if invertible.\n\nParameters\n----------\ncovariance : array-like of shape (n_features, n_features)\n Estimated covariance matrix to be stored, and from which precision\n is computed." - }, - { - "name": "get_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Getter for the precision matrix.\n\nReturns\n-------\nprecision_ : array-like of shape (n_features, n_features)\n The precision matrix associated to the current covariance object." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the Maximum Likelihood Estimator covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X_test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data of which we compute the likelihood, where n_samples is the number of samples and n_features is the number of features. X_test is assumed to be drawn from the same distribution than the data used in fit (including centering)." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the log-likelihood of a Gaussian data set with\n`self.covariance_` as an estimator of its covariance matrix.\n\nParameters\n----------\nX_test : array-like of shape (n_samples, n_features)\n Test data of which we compute the likelihood, where n_samples is\n the number of samples and n_features is the number of features.\n X_test is assumed to be drawn from the same distribution than\n the data used in fit (including centering).\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nres : float\n The likelihood of the data set with `self.covariance_` as an\n estimator of its covariance matrix." - }, - { - "name": "error_norm", - "decorators": [], - "parameters": [ - { - "name": "comp_cov", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The covariance to compare with." - }, - { - "name": "norm", - "type": "Literal[\"frobenius\", \"spectral\"]", - "hasDefault": true, - "default": "\"frobenius\"", - "limitation": null, - "ignored": false, - "docstring": "The type of norm used to compute the error. Available error types: - 'frobenius' (default): sqrt(tr(A^t.A)) - 'spectral': sqrt(max(eigenvalues(A^t.A)) where A is the error ``(comp_cov - self.covariance_)``." - }, - { - "name": "scaling", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True (default), the squared error norm is divided by n_features. If False, the squared error norm is not rescaled." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to compute the squared error norm or the error norm. If True (default), the squared error norm is returned. If False, the error norm is returned." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the Mean Squared Error between two covariance estimators.\n(In the sense of the Frobenius norm).\n\nParameters\n----------\ncomp_cov : array-like of shape (n_features, n_features)\n The covariance to compare with.\n\nnorm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n The type of norm used to compute the error. Available error types:\n - 'frobenius' (default): sqrt(tr(A^t.A))\n - 'spectral': sqrt(max(eigenvalues(A^t.A))\n where A is the error ``(comp_cov - self.covariance_)``.\n\nscaling : bool, default=True\n If True (default), the squared error norm is divided by n_features.\n If False, the squared error norm is not rescaled.\n\nsquared : bool, default=True\n Whether to compute the squared error norm or the error norm.\n If True (default), the squared error norm is returned.\n If False, the error norm is returned.\n\nReturns\n-------\nresult : float\n The Mean Squared Error (in the sense of the Frobenius norm) between\n `self` and `comp_cov` covariance estimators." - }, - { - "name": "mahalanobis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The observations, the Mahalanobis distances of the which we compute. Observations are assumed to be drawn from the same distribution than the data used in fit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the squared Mahalanobis distances of given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The observations, the Mahalanobis distances of the which we\n compute. Observations are assumed to be drawn from the same\n distribution than the data used in fit.\n\nReturns\n-------\ndist : ndarray of shape (n_samples,)\n Squared Mahalanobis distances of the observations." - } - ], - "docstring": "Maximum likelihood covariance estimator\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specifies if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo-inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EmpiricalCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = EmpiricalCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7569..., 0.2818...],\n [0.2818..., 0.3928...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])" - } - ], - "functions": [ - { - "name": "log_likelihood", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum Likelihood Estimator of covariance." - }, - { - "name": "precision", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The precision matrix of the covariance model to be tested." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the sample mean of the log_likelihood under a covariance model\n\ncomputes the empirical expected log-likelihood (accounting for the\nnormalization terms and scaling), allowing for universal comparison (beyond\nthis software package)\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Maximum Likelihood Estimator of covariance.\n\nprecision : ndarray of shape (n_features, n_features)\n The precision matrix of the covariance model to be tested.\n\nReturns\n-------\nlog_likelihood_ : float\n Sample mean of the log-likelihood." - }, - { - "name": "empirical_covariance", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data will be centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the Maximum likelihood covariance estimator\n\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n Empirical covariance (Maximum Likelihood Estimator).\n\nExamples\n--------\n>>> from sklearn.covariance import empirical_covariance\n>>> X = [[1,1,1],[1,1,1],[1,1,1],\n... [0,0,0],[0,0,0],[0,0,0]]\n>>> empirical_covariance(X)\narray([[0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25]])" - } - ] - }, - { - "name": "sklearn.covariance._graph_lasso", - "imports": [ - "from collections.abc import Sequence", - "import warnings", - "import operator", - "import sys", - "import time", - "import numpy as np", - "from scipy import linalg", - "from joblib import Parallel", - "from None import empirical_covariance", - "from None import EmpiricalCovariance", - "from None import log_likelihood", - "from exceptions import ConvergenceWarning", - "from utils.validation import check_random_state", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from linear_model import _cd_fast as cd_fast", - "from linear_model import lars_path_gram", - "from model_selection import check_cv", - "from model_selection import cross_val_score", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "GraphicalLasso", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If verbose is True, the objective function and dual gap are plotted at each iteration." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data are centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the GraphicalLasso model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n plotted at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n [0.049, 0.364, 0.017, 0.034],\n [0.218, 0.017, 0.322, 0.093],\n [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLassoCV" - }, - { - "name": "GraphicalLassoCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alphas", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "4", - "limitation": null, - "ignored": false, - "docstring": "If an integer is given, it fixes the number of points on the grids of alpha to be used. If a list is given, it gives the grid to be used. See the notes in the class docstring for more details. Range is (0, inf] when floats given." - }, - { - "name": "n_refinements", - "type": "int", - "hasDefault": true, - "default": "4", - "limitation": null, - "ignored": false, - "docstring": "The number of times the grid is refined. Not used if explicit values of alphas are passed. Range is [1, inf)." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.20 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where number of features is greater than number of samples. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If verbose is True, the objective function and duality gap are printed at each iteration." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data are centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the GraphicalLasso covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - }, - { - "name": "grid_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "cv_alphas_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n If an integer is given, it fixes the number of points on the\n grids of alpha to be used. If a list is given, it gives the\n grid to be used. See the notes in the class docstring for\n more details. Range is (0, inf] when floats given.\n\nn_refinements : int, default=4\n The number of times the grid is refined. Not used if explicit\n values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.20\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where number of features is greater\n than number of samples. Elsewhere prefer cd which is more numerically\n stable.\n\nn_jobs : int, default=None\n number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If verbose is True, the objective function and duality gap are\n printed at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n Penalization parameter selected.\n\ncv_alphas_ : list of shape (n_alphas,), dtype=float\n All penalization parameters explored.\n\n .. deprecated:: 0.24\n The `cv_alphas_` attribute is deprecated in version 0.24 in favor\n of `cv_results_['alphas']` and will be removed in version\n 1.1 (renaming of 0.26).\n\ngrid_scores_ : ndarray of shape (n_alphas, n_folds)\n Log-likelihood score on left-out data across folds.\n\n .. deprecated:: 0.24\n The `grid_scores_` attribute is deprecated in version 0.24 in favor\n of `cv_results_` and will be removed in version\n 1.1 (renaming of 0.26).\n\ncv_results_ : dict of ndarrays\n A dict with keys:\n\n alphas : ndarray of shape (n_alphas,)\n All penalization parameters explored.\n\n split(k)_score : ndarray of shape (n_alphas,)\n Log-likelihood score on left-out data across (k)th fold.\n\n mean_score : ndarray of shape (n_alphas,)\n Mean of scores over the folds.\n\n std_score : ndarray of shape (n_alphas,)\n Standard deviation of scores over the folds.\n\n .. versionadded:: 0.24\n\nn_iter_ : int\n Number of iterations run for the optimal alpha.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n [0.051, 0.364, 0.018, 0.036],\n [0.22 , 0.018, 0.322, 0.094],\n [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLasso\n\nNotes\n-----\nThe search for the optimal penalization parameter (alpha) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of alpha then come out as missing values, but the optimum may\nbe close to these missing values." - } - ], - "functions": [ - { - "name": "_objective", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluation of the graphical-lasso objective function\n\nthe objective function is made of a shifted scaled version of the\nnormalized log-likelihood (i.e. its empirical mean over the samples) and a\npenalisation term to promote sparsity" - }, - { - "name": "_dual_gap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Expression of the dual gap convergence criterion\n\nThe specific definition is given in Duchi \"Projected Subgradient Methods\nfor Learning Sparse Gaussians\"." - }, - { - "name": "alpha_max", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample covariance matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the maximum alpha for which there are some non-zeros off-diagonal.\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n The sample covariance matrix.\n\nNotes\n-----\nThis results from the bound for the all the Lasso that are solved\nin GraphicalLasso: each time, the row of cov corresponds to Xy. As the\nbound for alpha is given by `max(abs(Xy))`, the result follows." - }, - { - "name": "graphical_lasso", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Empirical covariance from which to compute the covariance estimate." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]." - }, - { - "name": "cov_init", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial guess for the covariance. If None, then the empirical covariance is used." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If verbose is True, the objective function and dual gap are printed at each iteration." - }, - { - "name": "return_costs", - "type": "bool", - "hasDefault": true, - "default": "Flase", - "limitation": null, - "ignored": false, - "docstring": "If return_costs is True, the objective function and dual gap at each iteration are returned." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "eps", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Default is `np.finfo(np.float64).eps`." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "l1-penalized covariance estimator\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n graph_lasso has been renamed to graphical_lasso\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Empirical covariance from which to compute the covariance estimate.\n\nalpha : float\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance. If None, then the empirical\n covariance is used.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n printed at each iteration.\n\nreturn_costs : bool, default=Flase\n If return_costs is True, the objective function and dual gap\n at each iteration are returned.\n\neps : float, default=eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Default is `np.finfo(np.float64).eps`.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The estimated covariance matrix.\n\nprecision : ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrix.\n\ncosts : list of (objective, dual_gap) pairs\n The list of values of the objective function and the dual gap at\n each iteration. Returned only if return_costs is True.\n\nn_iter : int\n Number of iterations. Returned only if `return_n_iter` is set to True.\n\nSee Also\n--------\nGraphicalLasso, GraphicalLassoCV\n\nNotes\n-----\nThe algorithm employed to solve this problem is the GLasso algorithm,\nfrom the Friedman 2008 Biostatistics paper. It is the same algorithm\nas in the R `glasso` package.\n\nOne possible difference with the `glasso` R package is that the\ndiagonal coefficients are not penalized." - }, - { - "name": "graphical_lasso_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The list of regularization parameters, decreasing order." - }, - { - "name": "cov_init", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial guess for the covariance." - }, - { - "name": "X_test", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Optional test matrix to measure generalisation error." - }, - { - "name": "mode", - "type": "Literal['cd', 'lars']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. The tolerance must be a positive number." - }, - { - "name": "enet_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. The tolerance must be a positive number." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations. This parameter should be a strictly positive integer." - }, - { - "name": "verbose", - "type": "Union[bool, int]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "The higher the verbosity flag, the more information is printed during the fitting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "l1-penalized covariance estimator along a path of decreasing alphas\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nalphas : array-like of shape (n_alphas,)\n The list of regularization parameters, decreasing order.\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance.\n\nX_test : array of shape (n_test_samples, n_features), default=None\n Optional test matrix to measure generalisation error.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. The tolerance must be a positive\n number.\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. The tolerance must be a positive number.\n\nmax_iter : int, default=100\n The maximum number of iterations. This parameter should be a strictly\n positive integer.\n\nverbose : int or bool, default=False\n The higher the verbosity flag, the more information is printed\n during the fitting.\n\nReturns\n-------\ncovariances_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated covariance matrices.\n\nprecisions_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrices.\n\nscores_ : list of shape (n_alphas,), dtype=float\n The generalisation error (log-likelihood) on the test data.\n Returned only if test data is passed." - } - ] - }, - { - "name": "sklearn.covariance._robust_covariance", - "imports": [ - "import warnings", - "import numbers", - "import numpy as np", - "from scipy import linalg", - "from scipy.stats import chi2", - "from None import empirical_covariance", - "from None import EmpiricalCovariance", - "from utils.extmath import fast_logdet", - "from utils import check_random_state", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "MinCovDet", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the support of the robust location and the covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment." - }, - { - "name": "support_fraction", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportion of points to be included in the support of the raw MCD estimate. Default is None, which implies that the minimum value of support_fraction will be used within the algorithm: `(n_sample + n_features + 1) / 2`. The parameter must be in the range (0, 1)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y: Ignored", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits a Minimum Covariance Determinant with the FastMCD algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n\ny: Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - }, - { - "name": "correct_covariance", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply a correction to raw Minimum Covariance Determinant estimates.\n\nCorrection using the empirical correction factor suggested\nby Rousseeuw and Van Driessen in [RVD]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\ncovariance_corrected : ndarray of shape (n_features, n_features)\n Corrected robust covariance estimate.\n\nReferences\n----------\n\n.. [RVD] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS" - }, - { - "name": "reweight_covariance", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Re-weight raw Minimum Covariance Determinant estimates.\n\nRe-weight observations using Rousseeuw's method (equivalent to\ndeleting outlying observations from the data set before\ncomputing location and covariance estimates) described\nin [RVDriessen]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\nlocation_reweighted : ndarray of shape (n_features,)\n Re-weighted robust location estimate.\n\ncovariance_reweighted : ndarray of shape (n_features, n_features)\n Re-weighted robust covariance estimate.\n\nsupport_reweighted : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the re-weighted robust location and covariance estimates.\n\nReferences\n----------\n\n.. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS" - } - ], - "docstring": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of the robust location and the covariance\n estimates is computed, and a covariance estimate is recomputed from\n it, without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is None, which implies that the minimum\n value of support_fraction will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n (0, 1).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the robust estimates of location and shape.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import MinCovDet\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = MinCovDet(random_state=0).fit(X)\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nReferences\n----------\n\n.. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n J. Am Stat Ass, 79:871, 1984.\n.. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n.. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400" - } - ], - "functions": [ - { - "name": "c_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data set in which we look for the n_support observations whose scatter matrix has minimum determinant." - }, - { - "name": "n_support", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of observations to compute the robust estimates of location and covariance from. This parameter must be greater than `n_samples / 2`." - }, - { - "name": "remaining_iterations", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations to perform. According to [Rouseeuw1999]_, two iterations are sufficient to get close to the minimum, and we never need more than 30 to reach convergence." - }, - { - "name": "initial_estimates", - "type": "Tuple[]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial estimates of location and shape from which to run the c_step procedure: - initial_estimates[0]: an initial location estimate - initial_estimates[1]: an initial covariance estimate" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode." - }, - { - "name": "cov_computation_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which will be used to compute the covariance. Must return array of shape (n_features, n_features)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "C_step procedure described in [Rouseeuw1984]_ aiming at computing MCD.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data set in which we look for the n_support observations whose\n scatter matrix has minimum determinant.\n\nn_support : int\n Number of observations to compute the robust estimates of location\n and covariance from. This parameter must be greater than\n `n_samples / 2`.\n\nremaining_iterations : int, default=30\n Number of iterations to perform.\n According to [Rouseeuw1999]_, two iterations are sufficient to get\n close to the minimum, and we never need more than 30 to reach\n convergence.\n\ninitial_estimates : tuple of shape (2,), default=None\n Initial estimates of location and shape from which to run the c_step\n procedure:\n - initial_estimates[0]: an initial location estimate\n - initial_estimates[1]: an initial covariance estimate\n\nverbose : bool, default=False\n Verbose mode.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location estimates.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance estimates.\n\nsupport : ndarray of shape (n_samples,)\n A mask for the `n_support` observations whose scatter matrix has\n minimum determinant.\n\nReferences\n----------\n.. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS" - }, - { - "name": "_c_step", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "select_candidates", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data (sub)set in which we look for the n_support purest observations." - }, - { - "name": "n_support", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples the pure data set must contain. This parameter must be in the range `[(n + p + 1)/2] < n_support < n`." - }, - { - "name": "n_trials", - "type": "Union[Tuple[], int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of different initial sets of observations from which to run the algorithm. This parameter should be a strictly positive integer. Instead of giving a number of trials to perform, one can provide a list of initial estimates that will be used to iteratively run c_step procedures. In this case: - n_trials[0]: array-like, shape (n_trials, n_features) is the list of `n_trials` initial location estimates - n_trials[1]: array-like, shape (n_trials, n_features, n_features) is the list of `n_trials` initial covariances estimates" - }, - { - "name": "select", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of best candidates results to return. This parameter must be a strictly positive integer." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the c_step procedure. (2 is enough to be close to the final solution. \"Never\" exceeds 20). This parameter must be a strictly positive integer." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Control the output verbosity." - }, - { - "name": "cov_computation_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which will be used to compute the covariance. Must return an array of shape (n_features, n_features)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data (sub)set in which we look for the n_support purest observations.\n\nn_support : int\n The number of samples the pure data set must contain.\n This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\nn_trials : int or tuple of shape (2,)\n Number of different initial sets of observations from which to\n run the algorithm. This parameter should be a strictly positive\n integer.\n Instead of giving a number of trials to perform, one can provide a\n list of initial estimates that will be used to iteratively run\n c_step procedures. In this case:\n - n_trials[0]: array-like, shape (n_trials, n_features)\n is the list of `n_trials` initial location estimates\n - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n is the list of `n_trials` initial covariances estimates\n\nselect : int, default=1\n Number of best candidates results to return. This parameter must be\n a strictly positive integer.\n\nn_iter : int, default=30\n Maximum number of iterations for the c_step procedure.\n (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n This parameter must be a strictly positive integer.\n\nverbose : bool, default=False\n Control the output verbosity.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nSee Also\n---------\nc_step\n\nReturns\n-------\nbest_locations : ndarray of shape (select, n_features)\n The `select` location estimates computed from the `select` best\n supports found in the data set (`X`).\n\nbest_covariances : ndarray of shape (select, n_features, n_features)\n The `select` covariance estimates computed from the `select`\n best supports found in the data set (`X`).\n\nbest_supports : ndarray of shape (select, n_samples)\n The `select` best supports found in the data set (`X`).\n\nReferences\n----------\n.. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS" - }, - { - "name": "fast_mcd", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix, with p features and n samples." - }, - { - "name": "support_fraction", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportion of points to be included in the support of the raw MCD estimate. Default is `None`, which implies that the minimum value of `support_fraction` will be used within the algorithm: `(n_sample + n_features + 1) / 2`. This parameter must be in the range (0, 1)." - }, - { - "name": "cov_computation_method", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which will be used to compute the covariance. Must return an array of shape (n_features, n_features)." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimates the Minimum Covariance Determinant matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is `None`, which implies that the minimum\n value of `support_fraction` will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. This parameter must be in the\n range (0, 1).\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location of the data.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance of the features.\n\nsupport : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the robust location and covariance estimates of the data set.\n\nNotes\n-----\nThe FastMCD algorithm has been introduced by Rousseuw and Van Driessen\nin \"A Fast Algorithm for the Minimum Covariance Determinant Estimator,\n1999, American Statistical Association and the American Society\nfor Quality, TECHNOMETRICS\".\nThe principle is to compute robust estimates and random subsets before\npooling them into a larger subsets, and finally into the full data set.\nDepending on the size of the initial sample, we have one, two or three\nsuch computation levels.\n\nNote that only raw estimates are returned. If one is interested in\nthe correction and reweighting steps described in [RouseeuwVan]_,\nsee the MinCovDet object.\n\nReferences\n----------\n\n.. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n\n.. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400" - } - ] - }, - { - "name": "sklearn.covariance._shrunk_covariance", - "imports": [ - "import warnings", - "import numpy as np", - "from None import empirical_covariance", - "from None import EmpiricalCovariance", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "ShrunkCovariance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored" - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data will be centered before computation." - }, - { - "name": "shrinkage", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y: Ignored", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the shrunk covariance model according to the given training data\nand parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny: Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Covariance estimator with shrinkage\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import ShrunkCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = ShrunkCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7387..., 0.2536...],\n [0.2536..., 0.4110...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])\n\nNotes\n-----\nThe regularized covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "LedoitWolf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation." - }, - { - "name": "block_size", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Size of blocks into which the covariance matrix will be split during its Ledoit-Wolf estimation. This is purely a memory optimization and does not affect results." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the Ledoit-Wolf shrunk covariance model according to the given\ntraining data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "LedoitWolf Estimator\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split\n during its Ledoit-Wolf estimation. This is purely a memory\n optimization and does not affect results.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import LedoitWolf\n>>> real_cov = np.array([[.4, .2],\n... [.2, .8]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=50)\n>>> cov = LedoitWolf().fit(X)\n>>> cov.covariance_\narray([[0.4406..., 0.1616...],\n [0.1616..., 0.8022...]])\n>>> cov.location_\narray([ 0.0595... , -0.0075...])\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the Ledoit and Wolf formula (see References)\n\nReferences\n----------\n\"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\nLedoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\nFebruary 2004, pages 365-411." - }, - { - "name": "OAS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "store_precision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specify if the estimated precision is stored." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the Oracle Approximating Shrinkage covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Oracle Approximating Shrinkage Estimator\n\nRead more in the :ref:`User Guide `.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import OAS\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> oas = OAS().fit(X)\n>>> oas.covariance_\narray([[0.7533..., 0.2763...],\n [0.2763..., 0.3964...]])\n>>> oas.precision_\narray([[ 1.7833..., -1.2431... ],\n [-1.2431..., 3.3889...]])\n>>> oas.shrinkage_\n0.0195...\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the OAS formula (see References)\n\nReferences\n----------\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010." - } - ], - "functions": [ - { - "name": "shrunk_covariance", - "decorators": [], - "parameters": [ - { - "name": "emp_cov", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Covariance matrix to be shrunk" - }, - { - "name": "shrinkage", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculates a covariance matrix shrunk on the diagonal\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nemp_cov : array-like of shape (n_features, n_features)\n Covariance matrix to be shrunk\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nNotes\n-----\nThe regularized (shrunk) covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "ledoit_wolf_shrinkage", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation." - }, - { - "name": "block_size", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Size of blocks into which the covariance matrix will be split." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n\nReturns\n-------\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "ledoit_wolf", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate" - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation." - }, - { - "name": "block_size", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Size of blocks into which the covariance matrix will be split. This is purely a memory optimization and does not affect results." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n This is purely a memory optimization and does not affect results.\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features" - }, - { - "name": "oas", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to compute the covariance estimate." - }, - { - "name": "assume_centered", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate covariance with the Oracle Approximating Shrinkage algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nReturns\n-------\nshrunk_cov : array-like of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularised (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\n\nThe formula we used to implement the OAS is slightly modified compared\nto the one given in the article. See :class:`OAS` for more details." - } - ] - }, - { - "name": "sklearn.covariance", - "imports": [ - "from _empirical_covariance import empirical_covariance", - "from _empirical_covariance import EmpiricalCovariance", - "from _empirical_covariance import log_likelihood", - "from _shrunk_covariance import shrunk_covariance", - "from _shrunk_covariance import ShrunkCovariance", - "from _shrunk_covariance import ledoit_wolf", - "from _shrunk_covariance import ledoit_wolf_shrinkage", - "from _shrunk_covariance import LedoitWolf", - "from _shrunk_covariance import oas", - "from _shrunk_covariance import OAS", - "from _robust_covariance import fast_mcd", - "from _robust_covariance import MinCovDet", - "from _graph_lasso import graphical_lasso", - "from _graph_lasso import GraphicalLasso", - "from _graph_lasso import GraphicalLassoCV", - "from _elliptic_envelope import EllipticEnvelope" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.covariance.tests.test_covariance", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn import datasets", - "from sklearn.covariance import empirical_covariance", - "from sklearn.covariance import EmpiricalCovariance", - "from sklearn.covariance import ShrunkCovariance", - "from sklearn.covariance import shrunk_covariance", - "from sklearn.covariance import LedoitWolf", - "from sklearn.covariance import ledoit_wolf", - "from sklearn.covariance import ledoit_wolf_shrinkage", - "from sklearn.covariance import OAS", - "from sklearn.covariance import oas" - ], - "classes": [], - "functions": [ - { - "name": "test_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shrunk_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ledoit_wolf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_naive_ledoit_wolf_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ledoit_wolf_small", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ledoit_wolf_large", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests.test_elliptic_envelope", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.covariance import EllipticEnvelope", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.exceptions import NotFittedError" - ], - "classes": [], - "functions": [ - { - "name": "test_elliptic_envelope", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests.test_graphical_lasso", - "imports": [ - "import sys", - "import pytest", - "import numpy as np", - "from scipy import linalg", - "from numpy.testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.covariance import graphical_lasso", - "from sklearn.covariance import GraphicalLasso", - "from sklearn.covariance import GraphicalLassoCV", - "from sklearn.covariance import empirical_covariance", - "from sklearn.datasets import make_sparse_spd_matrix", - "from io import StringIO", - "from sklearn.utils import check_random_state", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_graphical_lasso", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graph_lasso_2D", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_iris_singular", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_cv_grid_scores_and_cv_alphas_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphical_lasso_cv_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests.test_robust_covariance", - "imports": [ - "import itertools", - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn import datasets", - "from sklearn.covariance import empirical_covariance", - "from sklearn.covariance import MinCovDet", - "from sklearn.covariance import fast_mcd" - ], - "classes": [], - "functions": [ - { - "name": "test_mcd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fast_mcd_on_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_class_on_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "launch_mcd_on_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_issue1127", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_issue3367", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_support_covariance_is_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mcd_increasing_det_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.covariance.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.cross_decomposition._pls", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.linalg import pinv2", - "from scipy.linalg import svd", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import MultiOutputMixin", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils.extmath import svd_flip", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "_PLS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of predictors.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target vectors, where `n_samples` is the number of samples and\n `n_targets` is the number of response variables." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples to transform." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vectors." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y`, or perform in-place normalization." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Apply the dimension reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to transform.\n\nY : array-like of shape (n_samples, n_targets), default=None\n Target vectors.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nReturns\n-------\n`x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where `n_samples` is the number of samples and `n_components` is the number of pls components." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data back to its original space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data, where `n_samples` is the number of samples\n and `n_components` is the number of pls components.\n\nReturns\n-------\nx_reconstructed : array-like of shape (n_samples, n_features)\n\nNotes\n-----\nThis transformation will only be exact if `n_components=n_features`." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y`, or perform in-place normalization." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict targets of given samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nNotes\n-----\nThis call requires the estimation of a matrix of shape\n`(n_features, n_targets)`, which may be an issue in high dimensional\nspace." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of predictors." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vectors, where n_samples is the number of samples and n_targets is the number of response variables." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn and apply the dimension reduction on the train data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of predictors.\n\ny : array-like of shape (n_samples, n_targets), default=None\n Target vectors, where n_samples is the number of samples and\n n_targets is the number of response variables.\n\nReturns\n-------\nx_scores if Y is not given, (x_scores, y_scores) otherwise." - }, - { - "name": "norm_y_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://www.stat.washington.edu/research/reports/2000/tr371.pdf" - }, - { - "name": "PLSRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "algorithm", - "type": "Literal['nipals', 'svd']", - "hasDefault": true, - "default": "'nipals'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-06", - "limitation": null, - "ignored": false, - "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "PLS regression\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n The maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSRegression\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> pls2 = PLSRegression(n_components=2)\n>>> pls2.fit(X, Y)\nPLSRegression()\n>>> Y_pred = pls2.predict(X)" - }, - { - "name": "PLSCanonical", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "algorithm", - "type": "Literal['nipals', 'svd']", - "hasDefault": true, - "default": "'nipals'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "the maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-06", - "limitation": null, - "ignored": false, - "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSCanonical\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> plsca = PLSCanonical(n_components=2)\n>>> plsca.fit(X, Y)\nPLSCanonical()\n>>> X_c, Y_c = plsca.transform(X, Y)\n\nSee Also\n--------\nCCA\nPLSSVD" - }, - { - "name": "CCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "the maximum number of iterations of the power method." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-06", - "limitation": null, - "ignored": false, - "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import CCA\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> cca = CCA(n_components=1)\n>>> cca.fit(X, Y)\nCCA(n_components=1)\n>>> X_c, Y_c = cca.transform(X, Y)\n\nSee Also\n--------\nPLSCanonical\nPLSSVD" - }, - { - "name": "PLSSVD", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`." - }, - { - "name": "scale", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to scale `X` and `Y`." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training samples." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Targets." - }, - { - "name": "x_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_scores_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_mean_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "x_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_std_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples to be transformed." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to be transformed.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn and apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise." - } - ], - "docstring": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the crosscovariance matrix X'Y.\nIt is able to project both the training data `X` and the targets `Y`. The\ntraining data X is projected on the left singular vectors, while the\ntargets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n The number of components to keep. Should be in `[1,\n min(n_samples, n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\ny_weights_ : ndarray of (n_targets, n_components)\n The right singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.cross_decomposition import PLSSVD\n>>> X = np.array([[0., 0., 1.],\n... [1., 0., 0.],\n... [2., 2., 2.],\n... [2., 5., 4.]])\n>>> Y = np.array([[0.1, -0.2],\n... [0.9, 1.1],\n... [6.2, 5.9],\n... [11.9, 12.3]])\n>>> pls = PLSSVD(n_components=2).fit(X, Y)\n>>> X_c, Y_c = pls.transform(X, Y)\n>>> X_c.shape, Y_c.shape\n((4, 2), (4, 2))\n\nSee Also\n--------\nPLSCanonical\nCCA" - } - ], - "functions": [ - { - "name": "_get_first_singular_vectors_power_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the first left and right singular vectors of X'Y.\n\nProvides an alternative to the svd(X'Y) and uses the power method instead.\nWith norm_y_weights to True and in mode A, this corresponds to the\nalgorithm section 11.3 of the Wegelin's review, except this starts at the\n\"update saliences\" part." - }, - { - "name": "_get_first_singular_vectors_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the first left and right singular vectors of X'Y.\n\nHere the whole SVD is computed." - }, - { - "name": "_center_scale_xy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Center X, Y and scale if the scale parameter==True\n\nReturns\n-------\n X, Y, x_mean, y_mean, x_std, y_std" - }, - { - "name": "_svd_flip_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Same as svd_flip but works on 1d arrays, and is inplace" - } - ] - }, - { - "name": "sklearn.cross_decomposition", - "imports": [ - "from _pls import PLSCanonical", - "from _pls import PLSRegression", - "from _pls import PLSSVD", - "from _pls import CCA" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.cross_decomposition.tests.test_pls", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_linnerud", - "from sklearn.cross_decomposition._pls import _center_scale_xy", - "from sklearn.cross_decomposition._pls import _get_first_singular_vectors_power_method", - "from sklearn.cross_decomposition._pls import _get_first_singular_vectors_svd", - "from sklearn.cross_decomposition._pls import _svd_flip_1d", - "from sklearn.cross_decomposition import CCA", - "from sklearn.cross_decomposition import PLSSVD", - "from sklearn.cross_decomposition import PLSRegression", - "from sklearn.cross_decomposition import PLSCanonical", - "from sklearn.datasets import make_regression", - "from sklearn.utils import check_random_state", - "from sklearn.utils.extmath import svd_flip", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "assert_matrix_orthogonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pls_canonical_basics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_regression_constant_column_Y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_canonical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_check_pls_canonical_random", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_attibutes_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_univariate_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_test_scale_and_stability_datasets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate dataset for test_scale_and_stability" - }, - { - "name": "test_scale_and_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "scale=True is equivalent to scale=False on centered/scaled data\nThis allows to check numerical stability over platforms as well" - }, - { - "name": "test_n_components_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_bounds_pls_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scores_deprecations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_norm_y_weights_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_and_std_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_value_helpers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_component_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svd_flip_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.cross_decomposition.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.datasets.setup", - "imports": [ - "import numpy", - "import os", - "import platform", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets._base", - "imports": [ - "import csv", - "import hashlib", - "import os", - "import shutil", - "from collections import namedtuple", - "from os import environ", - "from os import listdir", - "from os import makedirs", - "from os.path import dirname", - "from os.path import exists", - "from os.path import expanduser", - "from os.path import isdir", - "from os.path import join", - "from os.path import splitext", - "from utils import Bunch", - "from utils import check_random_state", - "from utils import check_pandas_support", - "from utils.validation import _deprecate_positional_args", - "import numpy as np", - "from urllib.request import urlretrieve", - "from externals._pilutil import imread" - ], - "classes": [], - "functions": [ - { - "name": "get_data_home", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The path to scikit-learn data directory. If `None`, the default path is `~/sklearn_learn_data`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the path of the scikit-learn data dir.\n\nThis folder is used by some large dataset loaders to avoid downloading the\ndata several times.\n\nBy default the data dir is set to a folder named 'scikit_learn_data' in the\nuser home folder.\n\nAlternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\nvariable or programmatically by giving an explicit folder path. The '~'\nsymbol is expanded to the user home folder.\n\nIf the folder does not already exist, it is automatically created.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`." - }, - { - "name": "clear_data_home", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The path to scikit-learn data directory. If `None`, the default path is `~/sklearn_learn_data`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Delete all the content of the data home cache.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`." - }, - { - "name": "_convert_data_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_files", - "decorators": [], - "parameters": [ - { - "name": "container_path", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Path to the main folder holding one subfolder per category" - }, - { - "name": "description", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A paragraph describing the characteristic of the dataset: its source, reference, etc." - }, - { - "name": "categories", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None (default), load all the categories. If not None, list of category names to load (other categories ignored)." - }, - { - "name": "load_content", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to load or not the content of the different files. If true a 'data' attribute containing the text information is present in the data structure returned. If not, a filenames attribute gives the path to the files." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to shuffle the data: might be important for models that make the assumption that the samples are independent and identically distributed (i.i.d.), such as stochastic gradient descent." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, do not try to decode the content of the files (e.g. for images or other non-text content). If not None, encoding to use to decode text files to Unicode if load_content is True." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. Passed as keyword argument 'errors' to bytes.decode." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n container_folder/\n category_1_folder/\n file_1.txt\n file_2.txt\n ...\n file_42.txt\n category_2_folder/\n file_43.txt\n file_44.txt\n ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncontainer_path : str or unicode\n Path to the main folder holding one subfolder per category\n\ndescription : str or unicode, default=None\n A paragraph describing the characteristic of the dataset: its source,\n reference, etc.\n\ncategories : list of str, default=None\n If None (default), load all the categories. If not None, list of\n category names to load (other categories ignored).\n\nload_content : bool, default=True\n Whether to load or not the content of the different files. If true a\n 'data' attribute containing the text information is present in the data\n structure returned. If not, a filenames attribute gives the path to the\n files.\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nencoding : str, default=None\n If None, do not try to decode the content of the files (e.g. for images\n or other non-text content). If not None, encoding to use to decode text\n files to Unicode if load_content is True.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. Passed as keyword\n argument 'errors' to bytes.decode.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of str\n Only present when `load_content=True`.\n The raw text data to learn.\n target : ndarray\n The target labels (integer index).\n target_names : list\n The names of target classes.\n DESCR : str\n The full description of the dataset.\n filenames: ndarray\n The filenames holding the dataset." - }, - { - "name": "load_data", - "decorators": [], - "parameters": [ - { - "name": "module_path", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The module path." - }, - { - "name": "data_file_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of csv file to be loaded from module_path/data/data_file_name. For example 'wine_data.csv'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Loads data from module_path/data/data_file_name.\n\nParameters\n----------\nmodule_path : string\n The module path.\n\ndata_file_name : string\n Name of csv file to be loaded from\n module_path/data/data_file_name. For example 'wine_data.csv'.\n\nReturns\n-------\ndata : Numpy array\n A 2D array with each row representing one sample and each column\n representing the features of a given sample.\n\ntarget : Numpy array\n A 1D array holding target variables for all the samples in `data.\n For example target[0] is the target varible for data[0].\n\ntarget_names : Numpy array\n A 1D array containing the names of the classifications. For example\n target_names[0] is the name of the target[0] class." - }, - { - "name": "load_wine", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object." - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the wine dataset (classification).\n\n.. versionadded:: 0.18\n\nThe wine dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class [59,71,48]\nSamples total 178\nDimensionality 13\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (178, 13)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (178,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (178, 14)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\nThe copy of UCI ML Wine Data Set dataset is downloaded and modified to fit\nstandard format from:\nhttps://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n\nExamples\n--------\nLet's say you are interested in the samples 10, 80, and 140, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_wine\n>>> data = load_wine()\n>>> data.target[[10, 80, 140]]\narray([0, 1, 2])\n>>> list(data.target_names)\n['class_0', 'class_1', 'class_2']" - }, - { - "name": "load_iris", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the iris dataset (classification).\n\nThe iris dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class 50\nSamples total 150\nDimensionality 4\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (150, 4)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (150,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (150, 5)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed two wrong data points according to Fisher's paper.\n The new version is the same as in R, but not as in the UCI\n Machine Learning Repository.\n\nExamples\n--------\nLet's say you are interested in the samples 10, 25, and 50, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_iris\n>>> data = load_iris()\n>>> data.target[[10, 25, 50]]\narray([0, 0, 1])\n>>> list(data.target_names)\n['setosa', 'versicolor', 'virginica']" - }, - { - "name": "load_breast_cancer", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the breast cancer wisconsin dataset (classification).\n\nThe breast cancer dataset is a classic and very easy binary classification\ndataset.\n\n================= ==============\nClasses 2\nSamples per class 212(M),357(B)\nSamples total 569\nDimensionality 30\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (569, 30)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (569,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (569, 31)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThe copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is\ndownloaded from:\nhttps://goo.gl/U2Uwz2\n\nExamples\n--------\nLet's say you are interested in the samples 10, 50, and 85, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> data = load_breast_cancer()\n>>> data.target[[10, 50, 85]]\narray([0, 1, 0])\n>>> list(data.target_names)\n['malignant', 'benign']" - }, - { - "name": "load_digits", - "decorators": [], - "parameters": [ - { - "name": "n_class", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of classes to return. Between 0 and 10." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the digits dataset (classification).\n\nEach datapoint is a 8x8 image of a digit.\n\n================= ==============\nClasses 10\nSamples per class ~180\nSamples total 1797\nDimensionality 64\nFeatures integers 0-16\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_class : int, default=10\n The number of classes to return. Between 0 and 10.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (1797, 64)\n The flattened data matrix. If `as_frame=True`, `data` will be\n a pandas DataFrame.\n target: {ndarray, Series} of shape (1797,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n\n .. versionadded:: 0.20\n\n frame: DataFrame of shape (1797, 65)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n images: {ndarray} of shape (1797, 8, 8)\n The raw image data.\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttps://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nExamples\n--------\nTo load the data and visualize the images::\n\n >>> from sklearn.datasets import load_digits\n >>> digits = load_digits()\n >>> print(digits.data.shape)\n (1797, 64)\n >>> import matplotlib.pyplot as plt #doctest: +SKIP\n >>> plt.gray() #doctest: +SKIP\n >>> plt.matshow(digits.images[0]) #doctest: +SKIP\n >>> plt.show() #doctest: +SKIP" - }, - { - "name": "load_diabetes", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the diabetes dataset (regression).\n\n============== ==================\nSamples total 442\nDimensionality 10\nFeatures real, -.2 < x < .2\nTargets integer 25 - 346\n============== ==================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False.\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (442, 10)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (442,)\n The regression target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n frame: DataFrame of shape (442, 11)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18" - }, - { - "name": "load_linnerud", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the physical excercise linnerud dataset.\n\nThis dataset is suitable for multi-ouput regression tasks.\n\n============== ============================\nSamples total 20\nDimensionality 3 (for both data and target)\nFeatures integer\nTargets integer\n============== ============================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (20, 3)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, dataframe} of shape (20, 3)\n The regression targets. If `as_frame=True`, `target` will be\n a pandas DataFrame.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n frame: DataFrame of shape (20, 6)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18" - }, - { - "name": "load_boston", - "decorators": [], - "parameters": [ - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and return the boston house-prices dataset (regression).\n\n============== ==============\nSamples total 506\nDimensionality 13\nFeatures real, positive\nTargets real 5. - 50.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (506, 13)\n The data matrix.\n target : ndarray of shape (506, )\n The regression target.\n filename : str\n The physical location of boston csv dataset.\n\n .. versionadded:: 0.20\n\n DESCR : str\n The full description of the dataset.\n feature_names : ndarray\n The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed a wrong data point at [445, 0].\n\nExamples\n--------\n>>> from sklearn.datasets import load_boston\n>>> X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)" - }, - { - "name": "load_sample_images", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Load sample images for image manipulation.\n\nLoads both, ``china`` and ``flower``.\n\nRead more in the :ref:`User Guide `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n images : list of ndarray of shape (427, 640, 3)\n The two sample image.\n filenames : list\n The filenames for the images.\n DESCR : str\n The full description of the dataset.\n\nExamples\n--------\nTo load the data and visualize the images:\n\n>>> from sklearn.datasets import load_sample_images\n>>> dataset = load_sample_images() #doctest: +SKIP\n>>> len(dataset.images) #doctest: +SKIP\n2\n>>> first_img_data = dataset.images[0] #doctest: +SKIP\n>>> first_img_data.shape #doctest: +SKIP\n(427, 640, 3)\n>>> first_img_data.dtype #doctest: +SKIP\ndtype('uint8')" - }, - { - "name": "load_sample_image", - "decorators": [], - "parameters": [ - { - "name": "image_name", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the sample image loaded" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load the numpy array of a single sample image\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage_name : {`china.jpg`, `flower.jpg`}\n The name of the sample image loaded\n\nReturns\n-------\nimg : 3D array\n The image as a numpy array: height x width x color\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_sample_image\n>>> china = load_sample_image('china.jpg') # doctest: +SKIP\n>>> china.dtype # doctest: +SKIP\ndtype('uint8')\n>>> china.shape # doctest: +SKIP\n(427, 640, 3)\n>>> flower = load_sample_image('flower.jpg') # doctest: +SKIP\n>>> flower.dtype # doctest: +SKIP\ndtype('uint8')\n>>> flower.shape # doctest: +SKIP\n(427, 640, 3)" - }, - { - "name": "_pkl_filepath", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return filename for Python 3 pickles\n\nargs[-1] is expected to be the \".pkl\" filename. For compatibility with\nolder scikit-learn versions, a suffix is inserted before the extension.\n\n_pkl_filepath('/path/to/folder', 'filename.pkl') returns\n'/path/to/folder/filename_py3.pkl'" - }, - { - "name": "_sha256", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the sha256 hash of the file at path." - }, - { - "name": "_fetch_remote", - "decorators": [], - "parameters": [ - { - "name": "remote", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Named tuple containing remote dataset meta information: url, filename and checksum" - }, - { - "name": "dirname", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Directory to save the file to." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Helper function to download a remote dataset into path\n\nFetch a dataset pointed by remote's url, save into path using remote's\nfilename and ensure its integrity based on the SHA256 Checksum of the\ndownloaded file.\n\nParameters\n----------\nremote : RemoteFileMetadata\n Named tuple containing remote dataset meta information: url, filename\n and checksum\n\ndirname : string\n Directory to save the file to.\n\nReturns\n-------\nfile_path: string\n Full path of the created file." - } - ] - }, - { - "name": "sklearn.datasets._california_housing", - "imports": [ - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from os import makedirs", - "from os import remove", - "import tarfile", - "import numpy as np", - "import logging", - "import joblib", - "from None import get_data_home", - "from _base import _convert_data_dataframe", - "from _base import _fetch_remote", - "from _base import _pkl_filepath", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_california_housing", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target_columns. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the California housing dataset (regression).\n\n============== ==============\nSamples total 20640\nDimensionality 8\nFeatures real\nTarget real 0.15 - 5.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n\nreturn_X_y : bool, default=False.\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray, shape (20640, 8)\n Each row corresponding to the 8 feature values in order.\n If ``as_frame`` is True, ``data`` is a pandas object.\n target : numpy array of shape (20640,)\n Each value corresponds to the average\n house value in units of 100,000.\n If ``as_frame`` is True, ``target`` is a pandas object.\n feature_names : list of length 8\n Array of ordered feature names used in the dataset.\n DESCR : string\n Description of the California housing dataset.\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n .. versionadded:: 0.23\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\nNotes\n-----\n\nThis dataset consists of 20,640 samples and 9 features." - } - ] - }, - { - "name": "sklearn.datasets._covtype", - "imports": [ - "from gzip import GzipFile", - "import logging", - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from os import remove", - "from os import makedirs", - "import numpy as np", - "import joblib", - "from None import get_data_home", - "from _base import _convert_data_dataframe", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from _base import _pkl_filepath", - "from utils import check_random_state", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_covtype", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle dataset." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the covertype dataset (classification).\n\nDownload it if necessary.\n\n================= ============\nClasses 7\nSamples total 581012\nDimensionality 54\nFeatures int\n================= ============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is a pandas DataFrame or\n Series depending on the number of target columns. If `return_X_y` is\n True, then (`data`, `target`) will be pandas DataFrames or Series as\n described below.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (581012, 54)\n Each row corresponds to the 54 features in the dataset.\n target : ndarray of shape (581012,)\n Each value corresponds to one of\n the 7 forest covertypes with values\n ranging between 1 to 7.\n frame : dataframe of shape (581012, 53)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n Description of the forest covertype dataset.\n feature_names : list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - } - ] - }, - { - "name": "sklearn.datasets._kddcup99", - "imports": [ - "import errno", - "from gzip import GzipFile", - "import logging", - "import os", - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "import numpy as np", - "import joblib", - "from _base import _fetch_remote", - "from _base import _convert_data_dataframe", - "from None import get_data_home", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils import check_random_state", - "from utils import shuffle as shuffle_method", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_kddcup99", - "decorators": [], - "parameters": [ - { - "name": "subset", - "type": "Literal['SA', 'SF', 'http', 'smtp']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "To return the corresponding classical subsets of kddcup 99. If None, return the entire kddcup 99 dataset." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle dataset." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and for selection of abnormal samples if `subset='SA'`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "percent10", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to load only 10 percent of the data." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.20" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If `True`, returns a pandas Dataframe for the ``data`` and ``target`` objects in the `Bunch` returned object; `Bunch` return object will also have a ``frame`` member. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the kddcup99 dataset (classification).\n\nDownload it if necessary.\n\n================= ====================================\nClasses 23\nSamples total 4898431\nDimensionality 41\nFeatures discrete (int) or continuous (float)\n================= ====================================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsubset : {'SA', 'SF', 'http', 'smtp'}, default=None\n To return the corresponding classical subsets of kddcup 99.\n If None, return the entire kddcup 99 dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n .. versionadded:: 0.19\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and for\n selection of abnormal samples if `subset='SA'`. Pass an int for\n reproducible output across multiple function calls.\n See :term:`Glossary `.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If `True`, returns a pandas Dataframe for the ``data`` and ``target``\n objects in the `Bunch` returned object; `Bunch` return object will also\n have a ``frame`` member.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (494021, 41)\n The data matrix to learn. If `as_frame=True`, `data` will be a\n pandas DataFrame.\n target : {ndarray, series} of shape (494021,)\n The regression target for each sample. If `as_frame=True`, `target`\n will be a pandas Series.\n frame : dataframe of shape (494021, 42)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n The full description of the dataset.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - }, - { - "name": "_fetch_brute_kddcup99", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "percent10", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to load only 10 percent of the data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load the kddcup99 dataset, downloading it if necessary.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (494021, 41)\n Each row corresponds to the 41 features in the dataset.\n target : ndarray of shape (494021,)\n Each value corresponds to one of the 21 attack types or to the\n label 'normal.'.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n DESCR : str\n Description of the kddcup99 dataset." - }, - { - "name": "_mkdirp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure directory d exists (like mkdir -p on Unix)\nNo guarantee that the directory is writable." - } - ] - }, - { - "name": "sklearn.datasets._lfw", - "imports": [ - "from os import listdir", - "from os import makedirs", - "from os import remove", - "from os.path import dirname", - "from os.path import join", - "from os.path import exists", - "from os.path import isdir", - "import logging", - "import numpy as np", - "import joblib", - "from joblib import Memory", - "from _base import get_data_home", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import parse_version", - "import tarfile", - "from externals._pilutil import imread", - "from externals._pilutil import imresize" - ], - "classes": [], - "functions": [ - { - "name": "_check_fetch_lfw", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to download any missing LFW data" - }, - { - "name": "_load_imgs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Internally used to load images" - }, - { - "name": "_fetch_lfw_people", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform the actual data loading for the lfw people dataset\n\nThis operation is meant to be cached by a joblib wrapper." - }, - { - "name": "fetch_lfw_people", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "funneled", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Download and use the funneled variant of the dataset." - }, - { - "name": "resize", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ratio used to resize the each face picture." - }, - { - "name": "min_faces_per_person", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The extracted dataset will only retain pictures of people that have at least `min_faces_per_person` different pictures." - }, - { - "name": "color", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Keep the 3 RGB channels instead of averaging them to a single gray level channel. If color is True the shape of the data has one more dimension than the shape with color = False." - }, - { - "name": "slice_", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical correlation from the background" - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch object. See below for more information about the `dataset.data` and `dataset.target` object. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 5749\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\nmin_faces_per_person : int, default=None\n The extracted dataset will only retain pictures of people that have at\n least `min_faces_per_person` different pictures.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : numpy array of shape (13233, 2914)\n Each row corresponds to a ravelled face image\n of original size 62 x 47 pixels.\n Changing the ``slice_`` or resize parameters will change the\n shape of the output.\n images : numpy array of shape (13233, 62, 47)\n Each row is a face image corresponding to one of the 5749 people in\n the dataset. Changing the ``slice_``\n or resize parameters will change the shape of the output.\n target : numpy array of shape (13233,)\n Labels associated to each face image.\n Those labels range from 0-5748 and correspond to the person IDs.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - }, - { - "name": "_fetch_lfw_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform the actual data loading for the LFW pairs dataset\n\nThis operation is meant to be cached by a joblib wrapper." - }, - { - "name": "fetch_lfw_pairs", - "decorators": [], - "parameters": [ - { - "name": "subset", - "type": "Literal['train', 'test', '10_folds']", - "hasDefault": true, - "default": "'train'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the development training set, 'test' for the development test set, and '10_folds' for the official evaluation set that is meant to be used with a 10-folds cross validation." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "funneled", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Download and use the funneled variant of the dataset." - }, - { - "name": "resize", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ratio used to resize the each face picture." - }, - { - "name": "color", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Keep the 3 RGB channels instead of averaging them to a single gray level channel. If color is True the shape of the data has one more dimension than the shape with color = False." - }, - { - "name": "slice_", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical correlation from the background" - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 2\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task. As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsubset : {'train', 'test', '10_folds'}, default='train'\n Select the dataset to load: 'train' for the development training\n set, 'test' for the development test set, and '10_folds' for the\n official evaluation set that is meant to be used with a 10-folds\n cross validation.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By\n default all scikit-learn data is stored in '~/scikit_learn_data'\n subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (2200, 5828). Shape depends on ``subset``.\n Each row corresponds to 2 ravel'd face images\n of original size 62 x 47 pixels.\n Changing the ``slice_``, ``resize`` or ``subset`` parameters\n will change the shape of the output.\n pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``\n Each row has 2 face images corresponding\n to same or different person from the dataset\n containing 5749 people. Changing the ``slice_``,\n ``resize`` or ``subset`` parameters will change the shape of the\n output.\n target : numpy array of shape (2200,). Shape depends on ``subset``.\n Labels associated to each pair of images.\n The two label values being different persons or the same person.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset." - } - ] - }, - { - "name": "sklearn.datasets._olivetti_faces", - "imports": [ - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from os import makedirs", - "from os import remove", - "import numpy as np", - "from scipy.io.matlab import loadmat", - "import joblib", - "from None import get_data_home", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from _base import _pkl_filepath", - "from utils import check_random_state", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_olivetti_faces", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True the order of the dataset is shuffled to avoid having images of the same person grouped." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns `(data, target)` instead of a `Bunch` object. See below for more information about the `data` and `target` object. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the Olivetti faces data-set from AT&T (classification).\n\nDownload it if necessary.\n\n================= =====================\nClasses 40\nSamples total 400\nDimensionality 4096\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nshuffle : bool, default=False\n If True the order of the dataset is shuffled to avoid having\n images of the same person grouped.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data, target)` instead of a `Bunch` object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: ndarray, shape (400, 4096)\n Each row corresponds to a ravelled\n face image of original size 64 x 64 pixels.\n images : ndarray, shape (400, 64, 64)\n Each row is a face image\n corresponding to one of the 40 subjects of the dataset.\n target : ndarray, shape (400,)\n Labels associated to each face image.\n Those labels are ranging from 0-39 and correspond to the\n Subject IDs.\n DESCR : str\n Description of the modified Olivetti Faces Dataset.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22" - } - ] - }, - { - "name": "sklearn.datasets._openml", - "imports": [ - "import gzip", - "import json", - "import os", - "import shutil", - "import hashlib", - "from os.path import join", - "from warnings import warn", - "from contextlib import closing", - "from functools import wraps", - "from typing import Callable", - "from typing import Optional", - "from typing import Dict", - "from typing import Tuple", - "from typing import List", - "from typing import Any", - "from typing import Union", - "import itertools", - "from collections.abc import Generator", - "from collections import OrderedDict", - "from functools import partial", - "from urllib.request import urlopen", - "from urllib.request import Request", - "import numpy as np", - "import scipy.sparse", - "from externals import _arff", - "from externals._arff import ArffSparseDataType", - "from externals._arff import ArffContainerType", - "from None import get_data_home", - "from urllib.error import HTTPError", - "from utils import Bunch", - "from utils import get_chunk_n_rows", - "from utils import _chunk_generator", - "from utils import check_pandas_support", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "OpenMLError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "HTTP 412 is a specific OpenML error code, indicating a generic error" - } - ], - "functions": [ - { - "name": "_get_local_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_retry_with_clean_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "If the first call to the decorated function fails, the local cached\nfile is removed, and the function is called again. If ``data_home`` is\n``None``, then the function is called once." - }, - { - "name": "_open_openml_url", - "decorators": [], - "parameters": [ - { - "name": "openml_path", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OpenML URL that will be accessed. This will be prefixes with _OPENML_PREFIX" - }, - { - "name": "data_home", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Directory to which the files will be cached. If None, no caching will be applied." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Returns a resource from OpenML.org. Caches it to data_home if required.\n\nParameters\n----------\nopenml_path : str\n OpenML URL that will be accessed. This will be prefixes with\n _OPENML_PREFIX\n\ndata_home : str\n Directory to which the files will be cached. If None, no caching will\n be applied.\n\nReturns\n-------\nresult : stream\n A stream to the OpenML resource" - }, - { - "name": "_get_json_content_from_openml_api", - "decorators": [], - "parameters": [ - { - "name": "url", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The URL to load from. Should be an official OpenML endpoint" - }, - { - "name": "error_message", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The error message to raise if an acceptable OpenML error is thrown (acceptable error is, e.g., data id not found. Other errors, like 404's will throw the native error message)" - }, - { - "name": "data_home", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Location to cache the response. None if no cache is required." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Loads json data from the openml api\n\nParameters\n----------\nurl : str\n The URL to load from. Should be an official OpenML endpoint\n\nerror_message : str or None\n The error message to raise if an acceptable OpenML error is thrown\n (acceptable error is, e.g., data id not found. Other errors, like 404's\n will throw the native error message)\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\njson_data : json\n the json result from the OpenML server if the call was successful.\n An exception otherwise." - }, - { - "name": "_split_sparse_columns", - "decorators": [], - "parameters": [ - { - "name": "arff_data", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A tuple of three lists of equal size; first list indicating the value, second the x coordinate and the third the y coordinate." - }, - { - "name": "include_columns", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of columns to include." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])\n\nParameters\n----------\narff_data : tuple\n A tuple of three lists of equal size; first list indicating the value,\n second the x coordinate and the third the y coordinate.\n\ninclude_columns : list\n A list of columns to include.\n\nReturns\n-------\narff_data_new : tuple\n Subset of arff data with only the include columns indicated by the\n include_columns argument." - }, - { - "name": "_sparse_data_to_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_convert_arff_data", - "decorators": [], - "parameters": [ - { - "name": "arff", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "As obtained from liac-arff object." - }, - { - "name": "col_slice_x", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The column indices that are sliced from the original array to return as X data" - }, - { - "name": "col_slice_y", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The column indices that are sliced from the original array to return as y data" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncol_slice_x : list\n The column indices that are sliced from the original array to return\n as X data\n\ncol_slice_y : list\n The column indices that are sliced from the original array to return\n as y data\n\nReturns\n-------\nX : np.array or scipy.sparse.csr_matrix\ny : np.array" - }, - { - "name": "_feature_to_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Map feature to dtype for pandas DataFrame\n " - }, - { - "name": "_convert_arff_data_dataframe", - "decorators": [], - "parameters": [ - { - "name": "arff", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "As obtained from liac-arff object." - }, - { - "name": "columns", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Columns from dataframe to return." - }, - { - "name": "features_dict", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maps feature name to feature info from openml." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the ARFF object into a pandas DataFrame.\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncolumns : list\n Columns from dataframe to return.\n\nfeatures_dict : dict\n Maps feature name to feature info from openml.\n\nReturns\n-------\nresult : tuple\n tuple with the resulting dataframe" - }, - { - "name": "_get_data_info_by_name", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "name of the dataset" - }, - { - "name": "version", - "type": "Union[str, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If version is an integer, the exact name/version will be obtained from OpenML. If version is a string (value: \"active\") it will take the first version from OpenML that is annotated as active. Any other string values except \"active\" are treated as integer." - }, - { - "name": "data_home", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Location to cache the response. None if no cache is required." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Utilizes the openml dataset listing api to find a dataset by\nname/version\nOpenML api function:\nhttps://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name\n\nParameters\n----------\nname : str\n name of the dataset\n\nversion : int or str\n If version is an integer, the exact name/version will be obtained from\n OpenML. If version is a string (value: \"active\") it will take the first\n version from OpenML that is annotated as active. Any other string\n values except \"active\" are treated as integer.\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\nfirst_dataset : json\n json representation of the first dataset object that adhired to the\n search criteria" - }, - { - "name": "_get_data_description_by_id", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_qualities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_num_samples", - "decorators": [], - "parameters": [ - { - "name": "data_qualities", - "type": "List[Dict]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to retrieve the number of instances (samples) in the dataset." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the number of samples from data qualities.\n\nParameters\n----------\ndata_qualities : list of dict\n Used to retrieve the number of instances (samples) in the dataset.\n\nReturns\n-------\nn_samples : int\n The number of samples in the dataset or -1 if data qualities are\n unavailable." - }, - { - "name": "_load_arff_response", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load arff data with url and parses arff response with parse_arff" - }, - { - "name": "_download_data_to_bunch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Download OpenML ARFF and convert to Bunch of data\n " - }, - { - "name": "_verify_target_data_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_valid_data_column_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fetch_openml", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String identifier of the dataset. Note that OpenML can have multiple datasets with the same name." - }, - { - "name": "version", - "type": "Union[Literal['active'], int]", - "hasDefault": true, - "default": "'active'", - "limitation": null, - "ignored": false, - "docstring": "Version of the dataset. Can only be provided if also ``name`` is given. If 'active' the oldest version that's still active is used. Since there may be more than one active version of a dataset, and those versions may fundamentally be different from one another, setting an exact version is highly recommended." - }, - { - "name": "data_id", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "OpenML ID of the dataset. The most specific way of retrieving a dataset. If data_id is not given, name (and potential version) are used to obtain a dataset." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the data sets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "target_column", - "type": "Optional[Union[List, str]]", - "hasDefault": true, - "default": "'default-target'", - "limitation": null, - "ignored": false, - "docstring": "Specify the column name in the data to use as target. If 'default-target', the standard target column a stored on the server is used. If ``None``, all columns are returned as data and the target is ``None``. If list (of strings), all columns with these names are returned as multi-target (Note: not all scikit-learn classifiers can handle all types of multi-output combinations)" - }, - { - "name": "cache", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to cache downloaded datasets using joblib." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` objects." - }, - { - "name": "as_frame", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target_columns. The Bunch will contain a ``frame`` attribute with the target and the data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas DataFrames or Series as describe above. If as_frame is 'auto', the data and target will be converted to DataFrame or Series as if as_frame is set to True, unless the dataset is stored in sparse format. .. versionchanged:: 0.24 The default value of `as_frame` changed from `False` to `'auto'` in 0.24." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n The API is experimental (particularly the return value structure),\n and might have small backward-incompatible changes without notice\n or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n String identifier of the dataset. Note that OpenML can have multiple\n datasets with the same name.\n\nversion : int or 'active', default='active'\n Version of the dataset. Can only be provided if also ``name`` is given.\n If 'active' the oldest version that's still active is used. Since\n there may be more than one active version of a dataset, and those\n versions may fundamentally be different from one another, setting an\n exact version is highly recommended.\n\ndata_id : int, default=None\n OpenML ID of the dataset. The most specific way of retrieving a\n dataset. If data_id is not given, name (and potential version) are\n used to obtain a dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the data sets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n Specify the column name in the data to use as target. If\n 'default-target', the standard target column a stored on the server\n is used. If ``None``, all columns are returned as data and the\n target is ``None``. If list (of strings), all columns with these names\n are returned as multi-target (Note: not all scikit-learn classifiers\n can handle all types of multi-output combinations)\n\ncache : bool, default=True\n Whether to cache downloaded datasets using joblib.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n The Bunch will contain a ``frame`` attribute with the target and the\n data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n DataFrames or Series as describe above.\n\n If as_frame is 'auto', the data and target will be converted to\n DataFrame or Series as if as_frame is set to True, unless the dataset\n is stored in sparse format.\n\n .. versionchanged:: 0.24\n The default value of `as_frame` changed from `False` to `'auto'`\n in 0.24.\n\nReturns\n-------\n\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n The feature matrix. Categorical features are encoded as ordinals.\n target : np.array, pandas Series or DataFrame\n The regression target or classification labels, if applicable.\n Dtype is float if numeric, and object if categorical. If\n ``as_frame`` is True, ``target`` is a pandas object.\n DESCR : str\n The full description of the dataset\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n .. versionadded:: 0.22\n\n categories : dict or None\n Maps each categorical feature name to a list of values, such\n that the value encoded as i is ith in the list. If ``as_frame``\n is True, this is None.\n details : dict\n More metadata from OpenML\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. note:: EXPERIMENTAL\n\n This interface is **experimental** and subsequent releases may\n change attributes without notice (although there should only be\n minor changes to ``data`` and ``target``).\n\n Missing values in the 'data' are represented as NaN's. Missing values\n in 'target' are represented as NaN's (numerical target) or None\n (categorical target)" - } - ] - }, - { - "name": "sklearn.datasets._rcv1", - "imports": [ - "import logging", - "from os import remove", - "from os import makedirs", - "from os.path import dirname", - "from os.path import exists", - "from os.path import join", - "from gzip import GzipFile", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from None import get_data_home", - "from _base import _pkl_filepath", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from _svmlight_format_io import load_svmlight_files", - "from utils import shuffle as shuffle_", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "fetch_rcv1", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "subset", - "type": "Literal['train', 'test', 'all']", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the training set (23149 samples), 'test' for the test set (781265 samples), 'all' for both, with the training samples first if shuffle is False. This follows the official LYRL2004 chronological split." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle dataset." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch object. See below for more information about the `dataset.data` and `dataset.target` object. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the RCV1 multilabel dataset (classification).\n\nDownload it if necessary.\n\nVersion: RCV1-v2, vectors, full sets, topics multilabels.\n\n================= =====================\nClasses 103\nSamples total 804414\nDimensionality 47236\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='all'\n Select the dataset to load: 'train' for the training set\n (23149 samples), 'test' for the test set (781265 samples),\n 'all' for both, with the training samples first if shuffle is False.\n This follows the official LYRL2004 chronological split.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : sparse matrix of shape (804414, 47236), dtype=np.float64\n The array has 0.16% of non zero values. Will be of CSR format.\n target : sparse matrix of shape (804414, 103), dtype=np.uint8\n Each sample has a value of 1 in its categories, and 0 in others.\n The array has 3.15% of non zero values. Will be of CSR format.\n sample_id : ndarray of shape (804414,), dtype=np.uint32,\n Identification number of each sample, as ordered in dataset.data.\n target_names : ndarray of shape (103,), dtype=object\n Names of each target (RCV1 topics), as ordered in dataset.target.\n DESCR : str\n Description of the RCV1 dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20" - }, - { - "name": "_inverse_permutation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse permutation p." - }, - { - "name": "_find_permutation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the permutation from a to b." - } - ] - }, - { - "name": "sklearn.datasets._samples_generator", - "imports": [ - "import numbers", - "import array", - "from collections.abc import Iterable", - "import numpy as np", - "from scipy import linalg", - "import scipy.sparse as sp", - "from preprocessing import MultiLabelBinarizer", - "from utils import check_array", - "from utils import check_random_state", - "from utils import shuffle as util_shuffle", - "from utils.random import sample_without_replacement", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "_generate_hypercube", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns distinct binary samples of length dimensions.\n " - }, - { - "name": "make_classification", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The total number of features. These comprise ``n_informative`` informative features, ``n_redundant`` redundant features, ``n_repeated`` duplicated features and ``n_features-n_informative-n_redundant-n_repeated`` useless features drawn at random." - }, - { - "name": "n_informative", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of informative features. Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension ``n_informative``. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube." - }, - { - "name": "n_redundant", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of redundant features. These features are generated as random linear combinations of the informative features." - }, - { - "name": "n_repeated", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of duplicated features, drawn randomly from the informative and the redundant features." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of classes (or labels) of the classification problem." - }, - { - "name": "n_clusters_per_class", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of clusters per class." - }, - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportions of samples assigned to each class. If None, then classes are balanced. Note that if ``len(weights) == n_classes - 1``, then the last class weight is automatically inferred. More than ``n_samples`` samples may be returned if the sum of ``weights`` exceeds 1. Note that the actual class proportions will not exactly match ``weights`` when ``flip_y`` isn't 0." - }, - { - "name": "flip_y", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The fraction of samples whose class is assigned randomly. Larger values introduce noise in the labels and make the classification task harder. Note that the default setting flip_y > 0 might lead to less than ``n_classes`` in y in some cases." - }, - { - "name": "class_sep", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The factor multiplying the hypercube size. Larger values spread out the clusters/classes and make the classification task easier." - }, - { - "name": "hypercube", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the clusters are put on the vertices of a hypercube. If False, the clusters are put on the vertices of a random polytope." - }, - { - "name": "shift", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Shift features by the specified value. If None, then features are shifted by a random value drawn in [-class_sep, class_sep]." - }, - { - "name": "scale", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Multiply features by the specified value. If None, then features are scaled by a random value drawn in [1, 100]. Note that scaling happens after shifting." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples and the features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random n-class classification problem.\n\nThis initially creates clusters of points normally distributed (std=1)\nabout vertices of an ``n_informative``-dimensional hypercube with sides of\nlength ``2*class_sep`` and assigns an equal number of clusters to each\nclass. It introduces interdependence between these features and adds\nvarious types of further noise to the data.\n\nWithout shuffling, ``X`` horizontally stacks features in the following\norder: the primary ``n_informative`` features, followed by ``n_redundant``\nlinear combinations of the informative features, followed by ``n_repeated``\nduplicates, drawn randomly with replacement from the informative and\nredundant features. The remaining features are filled with random noise.\nThus, without shuffling, all useful features are contained in the columns\n``X[:, :n_informative + n_redundant + n_repeated]``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features. These comprise ``n_informative``\n informative features, ``n_redundant`` redundant features,\n ``n_repeated`` duplicated features and\n ``n_features-n_informative-n_redundant-n_repeated`` useless features\n drawn at random.\n\nn_informative : int, default=2\n The number of informative features. Each class is composed of a number\n of gaussian clusters each located around the vertices of a hypercube\n in a subspace of dimension ``n_informative``. For each cluster,\n informative features are drawn independently from N(0, 1) and then\n randomly linearly combined within each cluster in order to add\n covariance. The clusters are then placed on the vertices of the\n hypercube.\n\nn_redundant : int, default=2\n The number of redundant features. These features are generated as\n random linear combinations of the informative features.\n\nn_repeated : int, default=0\n The number of duplicated features, drawn randomly from the informative\n and the redundant features.\n\nn_classes : int, default=2\n The number of classes (or labels) of the classification problem.\n\nn_clusters_per_class : int, default=2\n The number of clusters per class.\n\nweights : array-like of shape (n_classes,) or (n_classes - 1,), default=None\n The proportions of samples assigned to each class. If None, then\n classes are balanced. Note that if ``len(weights) == n_classes - 1``,\n then the last class weight is automatically inferred.\n More than ``n_samples`` samples may be returned if the sum of\n ``weights`` exceeds 1. Note that the actual class proportions will\n not exactly match ``weights`` when ``flip_y`` isn't 0.\n\nflip_y : float, default=0.01\n The fraction of samples whose class is assigned randomly. Larger\n values introduce noise in the labels and make the classification\n task harder. Note that the default setting flip_y > 0 might lead\n to less than ``n_classes`` in y in some cases.\n\nclass_sep : float, default=1.0\n The factor multiplying the hypercube size. Larger values spread\n out the clusters/classes and make the classification task easier.\n\nhypercube : bool, default=True\n If True, the clusters are put on the vertices of a hypercube. If\n False, the clusters are put on the vertices of a random polytope.\n\nshift : float, ndarray of shape (n_features,) or None, default=0.0\n Shift features by the specified value. If None, then features\n are shifted by a random value drawn in [-class_sep, class_sep].\n\nscale : float, ndarray of shape (n_features,) or None, default=1.0\n Multiply features by the specified value. If None, then features\n are scaled by a random value drawn in [1, 100]. Note that scaling\n happens after shifting.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for class membership of each sample.\n\nNotes\n-----\nThe algorithm is adapted from Guyon [1] and was designed to generate\nthe \"Madelon\" dataset.\n\nReferences\n----------\n.. [1] I. Guyon, \"Design of experiments for the NIPS 2003 variable\n selection benchmark\", 2003.\n\nSee Also\n--------\nmake_blobs : Simplified variant.\nmake_multilabel_classification : Unrelated generator for multilabel tasks." - }, - { - "name": "make_multilabel_classification", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The total number of features." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of classes of the classification problem." - }, - { - "name": "n_labels", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The average number of labels per instance. More precisely, the number of labels per sample is drawn from a Poisson distribution with ``n_labels`` as its expected value, but samples are bounded (using rejection sampling) by ``n_classes``, and must be nonzero if ``allow_unlabeled`` is False." - }, - { - "name": "length", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "The sum of the features (number of words if documents) is drawn from a Poisson distribution with this expected value." - }, - { - "name": "allow_unlabeled", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, some instances might not belong to any class." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, return a sparse feature matrix .. versionadded:: 0.17 parameter to allow *sparse* output." - }, - { - "name": "return_indicator", - "type": "Literal['dense', 'sparse']", - "hasDefault": true, - "default": "'dense'", - "limitation": null, - "ignored": false, - "docstring": "If ``'dense'`` return ``Y`` in the dense binary indicator format. If ``'sparse'`` return ``Y`` in the sparse binary indicator format. ``False`` returns a list of lists of labels." - }, - { - "name": "return_distributions", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, return the prior class probability and conditional probabilities of features given classes, from which the data was drawn." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n - pick the number of labels: n ~ Poisson(n_labels)\n - n times, choose a class c: c ~ Multinomial(theta)\n - pick the document length: k ~ Poisson(length)\n - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features.\n\nn_classes : int, default=5\n The number of classes of the classification problem.\n\nn_labels : int, default=2\n The average number of labels per instance. More precisely, the number\n of labels per sample is drawn from a Poisson distribution with\n ``n_labels`` as its expected value, but samples are bounded (using\n rejection sampling) by ``n_classes``, and must be nonzero if\n ``allow_unlabeled`` is False.\n\nlength : int, default=50\n The sum of the features (number of words if documents) is drawn from\n a Poisson distribution with this expected value.\n\nallow_unlabeled : bool, default=True\n If ``True``, some instances might not belong to any class.\n\nsparse : bool, default=False\n If ``True``, return a sparse feature matrix\n\n .. versionadded:: 0.17\n parameter to allow *sparse* output.\n\nreturn_indicator : {'dense', 'sparse'} or False, default='dense'\n If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n ``'sparse'`` return ``Y`` in the sparse binary indicator format.\n ``False`` returns a list of lists of labels.\n\nreturn_distributions : bool, default=False\n If ``True``, return the prior class probability and conditional\n probabilities of features given classes, from which the data was\n drawn.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n The label sets. Sparse matrix should be of CSR format.\n\np_c : ndarray of shape (n_classes,)\n The probability of each class being drawn. Only returned if\n ``return_distributions=True``.\n\np_w_c : ndarray of shape (n_features, n_classes)\n The probability of each feature being drawn given each class.\n Only returned if ``return_distributions=True``." - }, - { - "name": "make_hastie_10_2", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "12000", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates data for binary classification used in\nHastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=12000\n The number of samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 10)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\nSee Also\n--------\nmake_gaussian_quantiles : A generalization of this dataset approach." - }, - { - "name": "make_regression", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - }, - { - "name": "n_informative", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of informative features, i.e., the number of features used to build the linear model used to generate the output." - }, - { - "name": "n_targets", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar." - }, - { - "name": "bias", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The bias term in the underlying linear model." - }, - { - "name": "effective_rank", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "if not None: The approximate number of singular vectors required to explain most of the input data by linear combinations. Using this kind of singular spectrum in the input allows the generator to reproduce the correlations often observed in practice. if None: The input set is well conditioned, centered and gaussian with unit variance." - }, - { - "name": "tail_strength", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The relative importance of the fat noisy tail of the singular values profile if `effective_rank` is not None. When a float, it should be between 0 and 1." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples and the features." - }, - { - "name": "coef", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the coefficients of the underlying linear model are returned." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random regression problem.\n\nThe input set can either be well conditioned (by default) or have a low\nrank-fat tail singular profile. See :func:`make_low_rank_matrix` for\nmore details.\n\nThe output is generated by applying a (potentially biased) random linear\nregression model with `n_informative` nonzero regressors to the previously\ngenerated input and some gaussian centered noise with some adjustable\nscale.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\nn_informative : int, default=10\n The number of informative features, i.e., the number of features used\n to build the linear model used to generate the output.\n\nn_targets : int, default=1\n The number of regression targets, i.e., the dimension of the y output\n vector associated with a sample. By default, the output is a scalar.\n\nbias : float, default=0.0\n The bias term in the underlying linear model.\n\neffective_rank : int, default=None\n if not None:\n The approximate number of singular vectors required to explain most\n of the input data by linear combinations. Using this kind of\n singular spectrum in the input allows the generator to reproduce\n the correlations often observed in practice.\n if None:\n The input set is well conditioned, centered and gaussian with\n unit variance.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile if `effective_rank` is not None. When a float, it should be\n between 0 and 1.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\ncoef : bool, default=False\n If True, the coefficients of the underlying linear model are returned.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n The output values.\n\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n The coefficient of the underlying linear model. It is returned only if\n coef is True." - }, - { - "name": "make_circles", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "If int, it is the total number of points generated. For odd numbers, the inner circle will have one point more than the outer circle. If two-element tuple, number of points in outer circle and inner circle. .. versionchanged:: 0.23 Added two-element tuple." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Standard deviation of Gaussian noise added to the data." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "factor", - "type": "float", - "hasDefault": true, - "default": ".", - "limitation": null, - "ignored": false, - "docstring": "Scale factor between inner and outer circle in the range `(0, 1)`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a large circle containing a smaller circle in 2d.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, it is the total number of points generated.\n For odd numbers, the inner circle will have one point more than the\n outer circle.\n If two-element tuple, number of points in outer circle and inner\n circle.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nfactor : float, default=.8\n Scale factor between inner and outer circle in the range `(0, 1)`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample." - }, - { - "name": "make_moons", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "If int, the total number of points generated. If two-element tuple, number of points in each of two moons. .. versionchanged:: 0.23 Added two-element tuple." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Standard deviation of Gaussian noise added to the data." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make two interleaving half circles.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms. Read more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, the total number of points generated.\n If two-element tuple, number of points in each of two moons.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample." - }, - { - "name": "make_blobs", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "If int, it is the total number of points equally divided among clusters. If array-like, each element of the sequence indicates the number of samples per cluster. .. versionchanged:: v0.20 one can now pass an array-like to the ``n_samples`` parameter" - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of features for each sample." - }, - { - "name": "centers", - "type": "Union[NDArray, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of centers to generate, or the fixed center locations. If n_samples is an int and centers is None, 3 centers are generated. If n_samples is array-like, centers must be either None or an array of length equal to the length of n_samples." - }, - { - "name": "cluster_std", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the clusters." - }, - { - "name": "center_box", - "type": "Tuple[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The bounding box for each cluster center when centers are generated at random." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_centers", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, then return the centers of each cluster .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate isotropic Gaussian blobs for clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like, default=100\n If int, it is the total number of points equally divided among\n clusters.\n If array-like, each element of the sequence indicates\n the number of samples per cluster.\n\n .. versionchanged:: v0.20\n one can now pass an array-like to the ``n_samples`` parameter\n\nn_features : int, default=2\n The number of features for each sample.\n\ncenters : int or ndarray of shape (n_centers, n_features), default=None\n The number of centers to generate, or the fixed center locations.\n If n_samples is an int and centers is None, 3 centers are generated.\n If n_samples is array-like, centers must be\n either None or an array of length equal to the length of n_samples.\n\ncluster_std : float or array-like of float, default=1.0\n The standard deviation of the clusters.\n\ncenter_box : tuple of float (min, max), default=(-10.0, 10.0)\n The bounding box for each cluster center when centers are\n generated at random.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nreturn_centers : bool, default=False\n If True, then return the centers of each cluster\n\n .. versionadded:: 0.23\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for cluster membership of each sample.\n\ncenters : ndarray of shape (n_centers, n_features)\n The centers of each cluster. Only returned if\n ``return_centers=True``.\n\nExamples\n--------\n>>> from sklearn.datasets import make_blobs\n>>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])\n>>> X, y = make_blobs(n_samples=[3, 3, 4], centers=None, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 1, 2, 0, 2, 2, 2, 1, 1, 0])\n\nSee Also\n--------\nmake_classification : A more intricate variant." - }, - { - "name": "make_friedman1", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of features. Should be at least 5." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features. Should be at least 5.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996." - }, - { - "name": "make_friedman2", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996." - }, - { - "name": "make_friedman3", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise applied to the output." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996." - }, - { - "name": "make_low_rank_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - }, - { - "name": "effective_rank", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The approximate number of singular vectors required to explain most of the data by linear combinations." - }, - { - "name": "tail_strength", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The relative importance of the fat noisy tail of the singular values profile. The value should be between 0 and 1." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\n tail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\neffective_rank : int, default=10\n The approximate number of singular vectors required to explain most of\n the data by linear combinations.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile. The value should be between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The matrix." - }, - { - "name": "make_sparse_coded_signal", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components in the dictionary" - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features of the dataset to generate" - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of active (non-zero) coefficients in each sample" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a signal as a sparse combination of dictionary elements.\n\nReturns a matrix Y = DX, such as D is (n_features, n_components),\nX is (n_components, n_samples) and each column of X has exactly\nn_nonzero_coefs non-zero elements.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int\n Number of samples to generate\n\nn_components : int\n Number of components in the dictionary\n\nn_features : int\n Number of features of the dataset to generate\n\nn_nonzero_coefs : int\n Number of active (non-zero) coefficients in each sample\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : ndarray of shape (n_features, n_samples)\n The encoded signal (Y).\n\ndictionary : ndarray of shape (n_features, n_components)\n The dictionary with normalized components (D).\n\ncode : ndarray of shape (n_components, n_samples)\n The sparse code such that each column of this matrix has exactly\n n_nonzero_coefs non-zero items (X)." - }, - { - "name": "make_sparse_uncorrelated", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of samples." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\n X ~ N(0, 1)\n y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] G. Celeux, M. El Anbari, J.-M. Marin, C. P. Robert,\n \"Regularization in regression: comparing Bayesian and frequentist\n methods in a poorly informative situation\", 2009." - }, - { - "name": "make_spd_matrix", - "decorators": [], - "parameters": [ - { - "name": "n_dim", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The matrix dimension." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a random symmetric, positive-definite matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_dim : int\n The matrix dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_dim, n_dim)\n The random symmetric, positive-definite matrix.\n\nSee Also\n--------\nmake_sparse_spd_matrix" - }, - { - "name": "make_sparse_spd_matrix", - "decorators": [], - "parameters": [ - { - "name": "dim", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The size of the random matrix to generate." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The probability that a coefficient is zero (see notes). Larger values enforce more sparsity. The value should be in the range 0 and 1." - }, - { - "name": "norm_diag", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to normalize the output matrix to make the leading diagonal elements all 1" - }, - { - "name": "smallest_coef", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The value of the smallest coefficient between 0 and 1." - }, - { - "name": "largest_coef", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The value of the largest coefficient between 0 and 1." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate a sparse symmetric definite positive matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndim : int, default=1\n The size of the random matrix to generate.\n\nalpha : float, default=0.95\n The probability that a coefficient is zero (see notes). Larger values\n enforce more sparsity. The value should be in the range 0 and 1.\n\nnorm_diag : bool, default=False\n Whether to normalize the output matrix to make the leading diagonal\n elements all 1\n\nsmallest_coef : float, default=0.1\n The value of the smallest coefficient between 0 and 1.\n\nlargest_coef : float, default=0.9\n The value of the largest coefficient between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nprec : sparse matrix of shape (dim, dim)\n The generated matrix.\n\nNotes\n-----\nThe sparsity is actually imposed on the cholesky factor of the matrix.\nThus alpha does not translate directly into the filling fraction of\nthe matrix itself.\n\nSee Also\n--------\nmake_spd_matrix" - }, - { - "name": "make_swiss_roll", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of sample points on the S curve." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a swiss roll dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of sample points on the S curve.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 3)\n The points.\n\nt : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold.\n\nNotes\n-----\nThe algorithm is from Marsland [1].\n\nReferences\n----------\n.. [1] S. Marsland, \"Machine Learning: An Algorithmic Perspective\",\n Chapter 10, 2009.\n http://seat.massey.ac.nz/personal/s.r.marsland/Code/10/lle.py" - }, - { - "name": "make_s_curve", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of sample points on the S curve." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate an S curve dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of sample points on the S curve.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 3)\n The points.\n\nt : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold." - }, - { - "name": "make_gaussian_quantiles", - "decorators": [], - "parameters": [ - { - "name": "mean", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The mean of the multi-dimensional normal distribution. If None then use the origin (0, 0, ...)." - }, - { - "name": "cov", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The covariance matrix will be this value times the unit matrix. This dataset only produces symmetric normal distributions." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The total number of points equally divided among classes." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The number of features for each sample." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of classes" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate isotropic Gaussian and label samples by quantile.\n\nThis classification dataset is constructed by taking a multi-dimensional\nstandard normal distribution and defining classes separated by nested\nconcentric multi-dimensional spheres such that roughly equal numbers of\nsamples are in each class (quantiles of the :math:`\\chi^2` distribution).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmean : ndarray of shape (n_features,), default=None\n The mean of the multi-dimensional normal distribution.\n If None then use the origin (0, 0, ...).\n\ncov : float, default=1.0\n The covariance matrix will be this value times the unit matrix. This\n dataset only produces symmetric normal distributions.\n\nn_samples : int, default=100\n The total number of points equally divided among classes.\n\nn_features : int, default=2\n The number of features for each sample.\n\nn_classes : int, default=3\n The number of classes\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for quantile membership of each sample.\n\nNotes\n-----\nThe dataset is from Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009." - }, - { - "name": "_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_biclusters", - "decorators": [], - "parameters": [ - { - "name": "shape", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The shape of the result." - }, - { - "name": "n_clusters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of biclusters." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "minval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Minimum value of a bicluster." - }, - { - "name": "maxval", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum value of a bicluster." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate an array with constant block diagonal structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nshape : iterable of shape (n_rows, n_cols)\n The shape of the result.\n\nn_clusters : int\n The number of biclusters.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nminval : int, default=10\n Minimum value of a bicluster.\n\nmaxval : int, default=100\n Maximum value of a bicluster.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape `shape`\n The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\nReferences\n----------\n\n.. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n words using bipartite spectral graph partitioning. In Proceedings\n of the seventh ACM SIGKDD international conference on Knowledge\n discovery and data mining (pp. 269-274). ACM.\n\nSee Also\n--------\nmake_checkerboard" - }, - { - "name": "make_checkerboard", - "decorators": [], - "parameters": [ - { - "name": "shape", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The shape of the result." - }, - { - "name": "n_clusters", - "type": "Union[ArrayLike, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of row and column clusters." - }, - { - "name": "noise", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The standard deviation of the gaussian noise." - }, - { - "name": "minval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Minimum value of a bicluster." - }, - { - "name": "maxval", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum value of a bicluster." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Shuffle the samples." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate an array with block checkerboard structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nshape : tuple of shape (n_rows, n_cols)\n The shape of the result.\n\nn_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n The number of row and column clusters.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nminval : int, default=10\n Minimum value of a bicluster.\n\nmaxval : int, default=100\n Maximum value of a bicluster.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape `shape`\n The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\n\nReferences\n----------\n\n.. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n Spectral biclustering of microarray data: coclustering genes\n and conditions. Genome research, 13(4), 703-716.\n\nSee Also\n--------\nmake_biclusters" - } - ] - }, - { - "name": "sklearn.datasets._species_distributions", - "imports": [ - "from io import BytesIO", - "from os import makedirs", - "from os import remove", - "from os.path import exists", - "import logging", - "import numpy as np", - "import joblib", - "from None import get_data_home", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args", - "from _base import _pkl_filepath" - ], - "classes": [], - "functions": [ - { - "name": "_load_coverage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load a coverage file from an open file object.\n\nThis will return a numpy array of the given dtype" - }, - { - "name": "_load_csv", - "decorators": [], - "parameters": [ - { - "name": "F", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "CSV file open in byte mode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Load csv file.\n\nParameters\n----------\nF : file object\n CSV file open in byte mode.\n\nReturns\n-------\nrec : np.ndarray\n record array representing the data" - }, - { - "name": "construct_grids", - "decorators": [], - "parameters": [ - { - "name": "batch", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object returned by :func:`fetch_species_distributions`" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Construct the map grid from the batch object\n\nParameters\n----------\nbatch : Batch object\n The object returned by :func:`fetch_species_distributions`\n\nReturns\n-------\n(xgrid, ygrid) : 1-D arrays\n The grid corresponding to the values in batch.coverages" - }, - { - "name": "fetch_species_distributions", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Loader for species distribution dataset from Phillips et. al. (2006)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n coverages : array, shape = [14, 1592, 1212]\n These represent the 14 features measured\n at each point of the map grid.\n The latitude/longitude values for the grid are discussed below.\n Missing data is represented by the value -9999.\n train : record array, shape = (1624,)\n The training points for the data. Each point has three fields:\n\n - train['species'] is the species name\n - train['dd long'] is the longitude, in degrees\n - train['dd lat'] is the latitude, in degrees\n test : record array, shape = (620,)\n The test points for the data. Same format as the training data.\n Nx, Ny : integers\n The number of longitudes (x) and latitudes (y) in the grid\n x_left_lower_corner, y_left_lower_corner : floats\n The (x,y) position of the lower-left corner, in degrees\n grid_size : float\n The spacing between points of the grid, in degrees\n\nReferences\n----------\n\n* `\"Maximum entropy modeling of species geographic distributions\"\n `_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n\nNotes\n-----\n\nThis dataset represents the geographic distribution of species.\nThe dataset is provided by Phillips et. al. (2006).\n\nThe two species are:\n\n- `\"Bradypus variegatus\"\n `_ ,\n the Brown-throated Sloth.\n\n- `\"Microryzomys minutus\"\n `_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n- For an example of using this dataset with scikit-learn, see\n :ref:`examples/applications/plot_species_distribution_modeling.py\n `." - } - ] - }, - { - "name": "sklearn.datasets._svmlight_format_io", - "imports": [ - "from contextlib import closing", - "import io", - "import os.path", - "import numpy as np", - "import scipy.sparse as sp", - "from None import __version__", - "from utils import check_array", - "from utils import IS_PYPY", - "from utils.validation import _deprecate_positional_args", - "from _svmlight_format_fast import _load_svmlight_file", - "import gzip", - "from bz2 import BZ2File" - ], - "classes": [], - "functions": [ - { - "name": "_load_svmlight_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_svmlight_file", - "decorators": [], - "parameters": [ - { - "name": "f", - "type": "Union[int, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "(Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will be uncompressed on the fly. If an integer is passed, it is assumed to be a file descriptor. A file-like or file descriptor will not be closed by this function. A file-like object must be opened in binary mode." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to use. If None, it will be inferred. This argument is useful to load several files that are subsets of a bigger sliced dataset: each subset might not have examples of every feature, hence the inferred shape might vary from one slice to another. n_features is only required if ``offset`` or ``length`` are passed a non-default value." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Data type of dataset to be loaded. This will be the data type of the output numpy arrays ``X`` and ``y``." - }, - { - "name": "multilabel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Samples may have several labels each (see https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)" - }, - { - "name": "zero_based", - "type": "Union[Literal[\"auto\"], bool]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "Whether column indices in f are zero-based (True) or one-based (False). If column indices are one-based, they are transformed to zero-based to match Python/NumPy conventions. If set to \"auto\", a heuristic check is applied to determine this from the file contents. Both kinds of files occur \"in the wild\", but they are unfortunately not self-identifying. Using \"auto\" or True should always be safe when no ``offset`` or ``length`` is passed. If ``offset`` or ``length`` are passed, the \"auto\" mode falls back to ``zero_based=True`` to avoid having the heuristic check yield inconsistent results on different segments of the file." - }, - { - "name": "query_id", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the query_id array for each file." - }, - { - "name": "offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ignore the offset first bytes by seeking forward, then discarding the following bytes up until the next new line character." - }, - { - "name": "length", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "If strictly positive, stop reading any new line of data once the position in the file has reached the (offset + length) bytes threshold." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When working on\nrepeatedly on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n https://github.com/mblondel/svmlight-loader\n\nParameters\n----------\nf : str, file-like or int\n (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. A file-like or file descriptor will not be closed\n by this function. A file-like object must be opened in binary mode.\n\nn_features : int, default=None\n The number of features to use. If None, it will be inferred. This\n argument is useful to load several files that are subsets of a\n bigger sliced dataset: each subset might not have examples of\n every feature, hence the inferred shape might vary from one\n slice to another.\n n_features is only required if ``offset`` or ``length`` are passed a\n non-default value.\n\ndtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no ``offset`` or ``length`` is passed.\n If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n to ``zero_based=True`` to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n If True, will return the query_id array for each file.\n\noffset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\nlength : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\nX : scipy.sparse matrix of shape (n_samples, n_features)\n\ny : ndarray of shape (n_samples,), or, in the multilabel a list of\n tuples of length n_samples.\n\nquery_id : array of shape (n_samples,)\n query_id for each sample. Only returned when query_id is set to\n True.\n\nSee Also\n--------\nload_svmlight_files : Similar function for loading multiple files in this\n format, enforcing the same number of features/columns on all of them.\n\nExamples\n--------\nTo use joblib.Memory to cache the svmlight file::\n\n from joblib import Memory\n from .datasets import load_svmlight_file\n mem = Memory(\"./mycache\")\n\n @mem.cache\n def get_data():\n data = load_svmlight_file(\"mysvmlightfile\")\n return data[0], data[1]\n\n X, y = get_data()" - }, - { - "name": "_gen_open", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_open_and_load", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_svmlight_files", - "decorators": [], - "parameters": [ - { - "name": "files", - "type": "Union[int, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "(Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will be uncompressed on the fly. If an integer is passed, it is assumed to be a file descriptor. File-likes and file descriptors will not be closed by this function. File-like objects must be opened in binary mode." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to use. If None, it will be inferred from the maximum column index occurring in any of the files. This can be set to a higher value than the actual number of features in any of the input files, but setting it to a lower value will cause an exception to be raised." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Data type of dataset to be loaded. This will be the data type of the output numpy arrays ``X`` and ``y``." - }, - { - "name": "multilabel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Samples may have several labels each (see https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)" - }, - { - "name": "zero_based", - "type": "Union[Literal[\"auto\"], bool]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "Whether column indices in f are zero-based (True) or one-based (False). If column indices are one-based, they are transformed to zero-based to match Python/NumPy conventions. If set to \"auto\", a heuristic check is applied to determine this from the file contents. Both kinds of files occur \"in the wild\", but they are unfortunately not self-identifying. Using \"auto\" or True should always be safe when no offset or length is passed. If offset or length are passed, the \"auto\" mode falls back to zero_based=True to avoid having the heuristic check yield inconsistent results on different segments of the file." - }, - { - "name": "query_id", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the query_id array for each file." - }, - { - "name": "offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Ignore the offset first bytes by seeking forward, then discarding the following bytes up until the next new line character." - }, - { - "name": "length", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "If strictly positive, stop reading any new line of data once the position in the file has reached the (offset + length) bytes threshold." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load dataset from multiple files in SVMlight format\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nParameters\n----------\nfiles : array-like, dtype=str, file-like or int\n (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. File-likes and file descriptors will not be\n closed by this function. File-like objects must be opened in binary\n mode.\n\nn_features : int, default=None\n The number of features to use. If None, it will be inferred from the\n maximum column index occurring in any of the files.\n\n This can be set to a higher value than the actual number of features\n in any of the input files, but setting it to a lower value will cause\n an exception to be raised.\n\ndtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no offset or length is passed.\n If offset or length are passed, the \"auto\" mode falls back\n to zero_based=True to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n If True, will return the query_id array for each file.\n\noffset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\nlength : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\n[X1, y1, ..., Xn, yn]\nwhere each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n\nIf query_id is set to True, this will return instead [X1, y1, q1,\n..., Xn, yn, qn] where (Xi, yi, qi) is the result from\nload_svmlight_file(files[i])\n\nNotes\n-----\nWhen fitting a model to a matrix X_train and evaluating it against a\nmatrix X_test, it is essential that X_train and X_test have the same\nnumber of features (X_train.shape[1] == X_test.shape[1]). This may not\nbe the case if you load the files individually with load_svmlight_file.\n\nSee Also\n--------\nload_svmlight_file" - }, - { - "name": "_dump_svmlight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "dump_svmlight_file", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Class labels must be an integer or float, or array-like objects of integer or float for multilabel classifications." - }, - { - "name": "f", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If string, specifies the path that will contain the data. If file-like, data will be written to f. f should be opened in binary mode." - }, - { - "name": "zero_based", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether column indices should be written zero-based (True) or one-based (False)." - }, - { - "name": "comment", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Comment to insert at the top of the file. This should be either a Unicode string, which will be encoded as UTF-8, or an ASCII byte string. If a comment is given, then it will be preceded by one that identifies the file as having been dumped by scikit-learn. Note that not all tools grok comments in SVMlight files." - }, - { - "name": "query_id", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing pairwise preference constraints (qid in svmlight format)." - }, - { - "name": "multilabel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Samples may have several labels each (see https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html) .. versionadded:: 0.17 parameter *multilabel* to support multilabel datasets." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dump the dataset in svmlight / libsvm file format.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n Target values. Class labels must be an\n integer or float, or array-like objects of integer or float for\n multilabel classifications.\n\nf : string or file-like in binary mode\n If string, specifies the path that will contain the data.\n If file-like, data will be written to f. f should be opened in binary\n mode.\n\nzero_based : boolean, default=True\n Whether column indices should be written zero-based (True) or one-based\n (False).\n\ncomment : string, default=None\n Comment to insert at the top of the file. This should be either a\n Unicode string, which will be encoded as UTF-8, or an ASCII byte\n string.\n If a comment is given, then it will be preceded by one that identifies\n the file as having been dumped by scikit-learn. Note that not all\n tools grok comments in SVMlight files.\n\nquery_id : array-like of shape (n_samples,), default=None\n Array containing pairwise preference constraints (qid in svmlight\n format).\n\nmultilabel : boolean, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n .. versionadded:: 0.17\n parameter *multilabel* to support multilabel datasets." - } - ] - }, - { - "name": "sklearn.datasets._twenty_newsgroups", - "imports": [ - "import os", - "from os.path import dirname", - "from os.path import join", - "import logging", - "import tarfile", - "import pickle", - "import shutil", - "import re", - "import codecs", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from None import get_data_home", - "from None import load_files", - "from _base import _convert_data_dataframe", - "from _base import _pkl_filepath", - "from _base import _fetch_remote", - "from _base import RemoteFileMetadata", - "from feature_extraction.text import CountVectorizer", - "from None import preprocessing", - "from utils import check_random_state", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "_download_20newsgroups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Download the 20 newsgroups data and stored it as a zipped pickle." - }, - { - "name": "strip_newsgroup_header", - "decorators": [], - "parameters": [ - { - "name": "text", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The text from which to remove the signature block." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Given text in \"news\" format, strip the headers, by removing everything\nbefore the first blank line.\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block." - }, - { - "name": "strip_newsgroup_quoting", - "decorators": [], - "parameters": [ - { - "name": "text", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The text from which to remove the signature block." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Given text in \"news\" format, strip lines beginning with the quote\ncharacters > or |, plus lines that often introduce a quoted section\n(for example, because they contain the string 'writes:'.)\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block." - }, - { - "name": "strip_newsgroup_footer", - "decorators": [], - "parameters": [ - { - "name": "text", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The text from which to remove the signature block." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Given text in \"news\" format, attempt to remove a signature block.\n\nAs a rough heuristic, we assume that signatures are set apart by either\na blank line or a line made of hyphens, and that it is the last such line\nin the file (disregarding blank lines at the end).\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block." - }, - { - "name": "fetch_20newsgroups", - "decorators": [], - "parameters": [ - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify a download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "subset", - "type": "Literal['train', 'test', 'all']", - "hasDefault": true, - "default": "'train'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the training set, 'test' for the test set, 'all' for both, with shuffled ordering." - }, - { - "name": "categories", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None (default), load all the categories. If not None, list of category names to load (other categories ignored)." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to shuffle the data: might be important for models that make the assumption that the samples are independent and identically distributed (i.i.d.), such as stochastic gradient descent." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "remove", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "May contain any subset of ('headers', 'footers', 'quotes'). Each of these are kinds of text that will be detected and removed from the newsgroup posts, preventing classifiers from overfitting on metadata. 'headers' removes newsgroup headers, 'footers' removes blocks at the ends of posts that look like signatures, and 'quotes' removes lines that appear to be quoting another post. 'headers' follows an exact standard; the other filters are not always correct." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise an IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns `(data.data, data.target)` instead of a Bunch object. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load the filenames and data from the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 1\nFeatures text\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.\n\nParameters\n----------\ndata_home : str, default=None\n Specify a download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\ncategories : array-like, dtype=str or unicode, default=None\n If None (default), load all the categories.\n If not None, list of category names to load (other categories\n ignored).\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nremove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\n 'headers' follows an exact standard; the other filters are not always\n correct.\n\ndownload_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data.data, data.target)` instead of a Bunch\n object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nbunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of shape (n_samples,)\n The data list to learn.\n target: ndarray of shape (n_samples,)\n The target labels.\n filenames: list of shape (n_samples,)\n The path to the location of the data.\n DESCR: str\n The full description of the dataset.\n target_names: list of shape (n_classes,)\n The names of target classes.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22" - }, - { - "name": "fetch_20newsgroups_vectorized", - "decorators": [], - "parameters": [ - { - "name": "subset", - "type": "Literal['train', 'test', 'all']", - "hasDefault": true, - "default": "'train'", - "limitation": null, - "ignored": false, - "docstring": "Select the dataset to load: 'train' for the training set, 'test' for the test set, 'all' for both, with shuffled ordering." - }, - { - "name": "remove", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "May contain any subset of ('headers', 'footers', 'quotes'). Each of these are kinds of text that will be detected and removed from the newsgroup posts, preventing classifiers from overfitting on metadata. 'headers' removes newsgroup headers, 'footers' removes blocks at the ends of posts that look like signatures, and 'quotes' removes lines that appear to be quoting another post." - }, - { - "name": "data_home", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specify an download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders." - }, - { - "name": "download_if_missing", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, raise an IOError if the data is not locally available instead of trying to download the data from the source site." - }, - { - "name": "return_X_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20" - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, normalizes each document's feature vector to unit norm using :func:`sklearn.preprocessing.normalize`. .. versionadded:: 0.22" - }, - { - "name": "as_frame", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string, or categorical). The target is a pandas DataFrame or Series depending on the number of `target_columns`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load and vectorize the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\nThis is a convenience function; the transformation is done using the\ndefault settings for\n:class:`~sklearn.feature_extraction.text.CountVectorizer`. For more\nadvanced usage (stopword filtering, n-gram extraction, etc.), combine\nfetch_20newsgroups with a custom\n:class:`~sklearn.feature_extraction.text.CountVectorizer`,\n:class:`~sklearn.feature_extraction.text.HashingVectorizer`,\n:class:`~sklearn.feature_extraction.text.TfidfTransformer` or\n:class:`~sklearn.feature_extraction.text.TfidfVectorizer`.\n\nThe resulting counts are normalized using\n:func:`sklearn.preprocessing.normalize` unless normalize is set to False.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 130107\nFeatures real\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.\n\nParameters\n----------\nsubset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\nremove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\ndata_home : str, default=None\n Specify an download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nnormalize : bool, default=True\n If True, normalizes each document's feature vector to unit norm using\n :func:`sklearn.preprocessing.normalize`.\n\n .. versionadded:: 0.22\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string, or categorical). The target is\n a pandas DataFrame or Series depending on the number of\n `target_columns`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nbunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: {sparse matrix, dataframe} of shape (n_samples, n_features)\n The input data matrix. If ``as_frame`` is `True`, ``data`` is\n a pandas DataFrame with sparse columns.\n target: {ndarray, series} of shape (n_samples,)\n The target labels. If ``as_frame`` is `True`, ``target`` is a\n pandas Series.\n target_names: list of shape (n_classes,)\n The names of target classes.\n DESCR: str\n The full description of the dataset.\n frame: dataframe of shape (n_samples, n_features + 1)\n Only present when `as_frame=True`. Pandas DataFrame with ``data``\n and ``target``.\n\n .. versionadded:: 0.24\n\n(data, target) : tuple if ``return_X_y`` is True\n `data` and `target` would be of the format defined in the `Bunch`\n description above.\n\n .. versionadded:: 0.20" - } - ] - }, - { - "name": "sklearn.datasets", - "imports": [ - "from _base import load_breast_cancer", - "from _base import load_boston", - "from _base import load_diabetes", - "from _base import load_digits", - "from _base import load_files", - "from _base import load_iris", - "from _base import load_linnerud", - "from _base import load_sample_images", - "from _base import load_sample_image", - "from _base import load_wine", - "from _base import get_data_home", - "from _base import clear_data_home", - "from _covtype import fetch_covtype", - "from _kddcup99 import fetch_kddcup99", - "from _lfw import fetch_lfw_pairs", - "from _lfw import fetch_lfw_people", - "from _twenty_newsgroups import fetch_20newsgroups", - "from _twenty_newsgroups import fetch_20newsgroups_vectorized", - "from _openml import fetch_openml", - "from _samples_generator import make_classification", - "from _samples_generator import make_multilabel_classification", - "from _samples_generator import make_hastie_10_2", - "from _samples_generator import make_regression", - "from _samples_generator import make_blobs", - "from _samples_generator import make_moons", - "from _samples_generator import make_circles", - "from _samples_generator import make_friedman1", - "from _samples_generator import make_friedman2", - "from _samples_generator import make_friedman3", - "from _samples_generator import make_low_rank_matrix", - "from _samples_generator import make_sparse_coded_signal", - "from _samples_generator import make_sparse_uncorrelated", - "from _samples_generator import make_spd_matrix", - "from _samples_generator import make_swiss_roll", - "from _samples_generator import make_s_curve", - "from _samples_generator import make_sparse_spd_matrix", - "from _samples_generator import make_gaussian_quantiles", - "from _samples_generator import make_biclusters", - "from _samples_generator import make_checkerboard", - "from _svmlight_format_io import load_svmlight_file", - "from _svmlight_format_io import load_svmlight_files", - "from _svmlight_format_io import dump_svmlight_file", - "from _olivetti_faces import fetch_olivetti_faces", - "from _species_distributions import fetch_species_distributions", - "from _california_housing import fetch_california_housing", - "from _rcv1 import fetch_rcv1" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.datasets.tests.conftest", - "imports": [ - "import builtins", - "import pytest" - ], - "classes": [], - "functions": [ - { - "name": "hide_available_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pretend pandas was not installed. " - } - ] - }, - { - "name": "sklearn.datasets.tests.test_20news", - "imports": [ - "from functools import partial", - "from unittest.mock import patch", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.datasets.tests.test_common import check_as_frame", - "from sklearn.datasets.tests.test_common import check_pandas_dependency_message", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.preprocessing import normalize" - ], - "classes": [], - "functions": [ - { - "name": "test_20news", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_20news_length_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Checks the length consistencies within the bunch\n\nThis is a non-regression test for a bug present in 0.16.1." - }, - { - "name": "test_20news_vectorized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_20news_normalization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_20news_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_as_frame_no_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_outdated_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_base", - "imports": [ - "import os", - "import shutil", - "import tempfile", - "import warnings", - "from pickle import loads", - "from pickle import dumps", - "from functools import partial", - "import pytest", - "import numpy as np", - "from sklearn.datasets import get_data_home", - "from sklearn.datasets import clear_data_home", - "from sklearn.datasets import load_files", - "from sklearn.datasets import load_sample_images", - "from sklearn.datasets import load_sample_image", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_linnerud", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.datasets import load_boston", - "from sklearn.datasets import load_wine", - "from sklearn.utils import Bunch", - "from sklearn.datasets.tests.test_common import check_as_frame", - "from sklearn.externals._pilutil import pillow_installed", - "from sklearn.utils import IS_PYPY" - ], - "classes": [], - "functions": [ - { - "name": "_remove_dir", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_home", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load_files_root", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_category_dir_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_category_dir_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_data_home", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_empty_load_files", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_load_files", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_files_w_categories_desc_and_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_files_wo_load_content", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_sample_images", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_sample_image", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_missing_sample_image_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loader", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_dataset_frame_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loads_dumps_bunch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bunch_pickle_generated_with_0_16_and_read_with_0_17", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bunch_dir", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_california_housing", - "imports": [ - "import pytest", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from functools import partial" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_asframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_common", - "imports": [ - "import inspect", - "import os", - "import pytest", - "import numpy as np", - "import sklearn.datasets", - "import PIL", - "import pandas" - ], - "classes": [], - "functions": [ - { - "name": "is_pillow_installed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_skip_network_tests", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_func_supporting_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_common_check_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_common_check_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_common_check_pandas_dependency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_covtype", - "imports": [ - "from functools import partial", - "import pytest", - "from sklearn.datasets.tests.test_common import check_return_X_y" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_asframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_kddcup99", - "imports": [ - "from functools import partial", - "import pytest", - "from sklearn.datasets.tests.test_common import check_as_frame", - "from sklearn.datasets.tests.test_common import check_pandas_dependency_message", - "from sklearn.datasets.tests.test_common import check_return_X_y" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch_kddcup99_percent10", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_kddcup99_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_kddcup99_as_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_kddcup99_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_dependency_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_lfw", - "imports": [ - "import random", - "import os", - "import shutil", - "import tempfile", - "import numpy as np", - "import pytest", - "from functools import partial", - "from sklearn.externals._pilutil import pillow_installed", - "from sklearn.externals._pilutil import imsave", - "from sklearn.datasets import fetch_lfw_pairs", - "from sklearn.datasets import fetch_lfw_people", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import SkipTest", - "from sklearn.datasets.tests.test_common import check_return_X_y" - ], - "classes": [], - "functions": [ - { - "name": "setup_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test fixture run once and common to all tests of this module" - }, - { - "name": "teardown_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test fixture (clean up) run once after all tests of this module" - }, - { - "name": "test_load_empty_lfw_people", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_fake_lfw_people", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_fake_lfw_people_too_restrictive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_empty_lfw_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_fake_lfw_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_olivetti_faces", - "imports": [ - "import numpy as np", - "from sklearn.utils import Bunch", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_olivetti_faces", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_openml", - "imports": [ - "import gzip", - "import json", - "import numpy as np", - "import os", - "import re", - "import scipy.sparse", - "import sklearn", - "import pytest", - "from sklearn import config_context", - "from sklearn.datasets import fetch_openml", - "from sklearn.datasets._openml import _open_openml_url", - "from sklearn.datasets._openml import _arff", - "from sklearn.datasets._openml import _DATA_FILE", - "from sklearn.datasets._openml import _convert_arff_data", - "from sklearn.datasets._openml import _convert_arff_data_dataframe", - "from sklearn.datasets._openml import _get_data_description_by_id", - "from sklearn.datasets._openml import _get_local_path", - "from sklearn.datasets._openml import _retry_with_clean_cache", - "from sklearn.datasets._openml import _feature_to_dtype", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils import is_scalar_nan", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from urllib.error import HTTPError", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.externals._arff import ArffContainerType", - "from functools import partial", - "from sklearn.utils._testing import fails_if_pypy" - ], - "classes": [ - { - "name": "_MockHTTPResponse", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "read", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "close", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__enter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_test_features_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fetch_dataset_from_openml", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_monkey_patch_webbased_functions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_to_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_to_dtype_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_pandas_equal_to_no_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_multitarget_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_anneal_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_cpu_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_australian_pandas_error_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_as_frame_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convert_arff_data_dataframe_warning_low_memory_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_adultcensus_pandas_return_X_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_adultcensus_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_miceprotein_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_emotions_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_titanic_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_iris_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_anneal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_anneal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_anneal_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_cpu", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_cpu", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_australian", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_adultcensus", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_miceprotein", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_emotions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decode_emotions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_open_openml_url_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_open_openml_url_unlinks_local_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_retry_with_clean_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_retry_with_clean_cache_http_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_notarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_inactive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_nonexiting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_illegal_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warn_ignore_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_string_attribute_without_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dataset_with_openml_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dataset_with_openml_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_illegal_column", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_raises_missing_values_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_raises_illegal_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_with_ignored_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fetch_openml_verify_checksum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convert_arff_data_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_rcv1", - "imports": [ - "import scipy.sparse as sp", - "import numpy as np", - "from functools import partial", - "from sklearn.datasets.tests.test_common import check_return_X_y", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_fetch_rcv1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_samples_generator", - "imports": [ - "from collections import defaultdict", - "from functools import partial", - "import numpy as np", - "import pytest", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.datasets import make_hastie_10_2", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import make_friedman1", - "from sklearn.datasets import make_friedman2", - "from sklearn.datasets import make_friedman3", - "from sklearn.datasets import make_low_rank_matrix", - "from sklearn.datasets import make_moons", - "from sklearn.datasets import make_circles", - "from sklearn.datasets import make_sparse_coded_signal", - "from sklearn.datasets import make_sparse_uncorrelated", - "from sklearn.datasets import make_spd_matrix", - "from sklearn.datasets import make_swiss_roll", - "from sklearn.datasets import make_s_curve", - "from sklearn.datasets import make_biclusters", - "from sklearn.datasets import make_checkerboard", - "from sklearn.utils.validation import assert_all_finite", - "from numpy.linalg import svd", - "from numpy.linalg import eig" - ], - "classes": [], - "functions": [ - { - "name": "test_make_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_classification_informative_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test the construction of informative features in make_classification\n\nAlso tests `n_clusters_per_class`, `n_classes`, `hypercube` and\nfully-specified `weights`." - }, - { - "name": "test_make_classification_weights_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_classification_weights_array_or_list_ok", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_return_sequences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_return_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_return_indicator_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_multilabel_classification_valid_arguments", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_hastie_10_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_regression_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_n_samples_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_n_samples_list_with_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_n_samples_centers_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_return_centers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_blobs_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_friedman1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_friedman2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_friedman3", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_low_rank_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_sparse_coded_signal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_sparse_uncorrelated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_spd_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_swiss_roll", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_s_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_biclusters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_checkerboard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_moons", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_moons_unbalanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_circles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_circles_unbalanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests.test_svmlight_format", - "imports": [ - "from bz2 import BZ2File", - "import gzip", - "from io import BytesIO", - "import numpy as np", - "import scipy.sparse as sp", - "import os", - "import shutil", - "from tempfile import NamedTemporaryFile", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import fails_if_pypy", - "import sklearn", - "from sklearn.datasets import load_svmlight_file", - "from sklearn.datasets import load_svmlight_files", - "from sklearn.datasets import dump_svmlight_file" - ], - "classes": [], - "functions": [ - { - "name": "test_load_svmlight_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_file_fd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_file_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_files", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_svmlight_file_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_compressed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_invalid_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_invalid_order_file", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_zero_based", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_zero_based_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_qid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_large_qid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID" - }, - { - "name": "test_load_invalid_file2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_a_filename", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_filename", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_concise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_comment", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dump_query_id", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_long_qid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_offsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_offset_exhaustive_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_load_with_offsets_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.datasets.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.decomposition.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition._base", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from abc import ABCMeta", - "from abc import abstractmethod" - ], - "classes": [ - { - "name": "_BasePCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "get_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data covariance with the generative model.\n\n``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\nwhere S**2 contains the explained variances, and sigma2 contains the\nnoise variances.\n\nReturns\n-------\ncov : array, shape=(n_features, n_features)\n Estimated covariance of data." - }, - { - "name": "get_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data precision matrix with the generative model.\n\nEquals the inverse of the covariance but computed with\nthe matrix inversion lemma for efficiency.\n\nReturns\n-------\nprecision : array, shape=(n_features, n_features)\n Estimated precision of data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Placeholder for fit. Subclasses should implement this method!\n\nFit the model with X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.decomposition import IncrementalPCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n>>> ipca.fit(X)\nIncrementalPCA(batch_size=3, n_components=2)\n>>> ipca.transform(X) # doctest: +SKIP" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples is the number of samples and n_components is the number of components." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform data back to its original space.\n\nIn other words, return an input X_original whose transform would be X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_components)\n New data, where n_samples is the number of samples\n and n_components is the number of components.\n\nReturns\n-------\nX_original array-like, shape (n_samples, n_features)\n\nNotes\n-----\nIf whitening is enabled, inverse_transform will compute the\nexact inverse operation, which includes reversing whitening." - } - ], - "docstring": "Base class for PCA methods.\n\nWarning: This class should not be used directly.\nUse derived classes instead." - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._dict_learning", - "imports": [ - "import time", - "import sys", - "import itertools", - "from math import ceil", - "import numpy as np", - "from scipy import linalg", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import deprecated", - "from utils import check_array", - "from utils import check_random_state", - "from utils import gen_even_slices", - "from utils import gen_batches", - "from utils.extmath import randomized_svd", - "from utils.extmath import row_norms", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from linear_model import Lasso", - "from linear_model import orthogonal_mp_gram", - "from linear_model import LassoLars", - "from linear_model import Lars" - ], - "classes": [ - { - "name": "_BaseSparseCoding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private method allowing to accomodate both DictionaryLearning and\nSparseCoder." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data to be transformed, must have the same number of features as the data used to train the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - } - ], - "docstring": "Base class from SparseCoder and DictionaryLearning algorithms." - }, - { - "name": "SparseCoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary atoms used for sparse coding. Lines are assumed to be normalized to unit norm." - }, - { - "name": "transform_algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": true, - "default": "'omp'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to transform the data: - `'lars'`: uses the least angle regression method (`linear_model.lars_path`); - `'lasso_lars'`: uses Lars to compute the Lasso solution; - `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if the estimated components are sparse; - `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution; - `'threshold'`: squashes to zero all coefficients less than alpha from the projection ``dictionary * X'``." - }, - { - "name": "transform_n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `transform_n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "transform_alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1." - }, - { - "name": "split_sign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to split the sparse feature vector into the concatenation of its negative part and its positive part. This can improve the performance of downstream classifiers." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "transform_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `lasso_lars`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : Ignored\n\ny : Ignored\n\nReturns\n-------\nself : object" - }, - { - "name": "components_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data to be transformed, must have the same number of features as the data used to train the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_components_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Sparse coding\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary atoms used for sparse coding. Lines are assumed to be\n normalized to unit norm.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n the estimated components are sparse;\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `lasso_lars`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The unchanged dictionary atoms.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `dictionary` instead.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import SparseCoder\n>>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n>>> dictionary = np.array(\n... [[0, 1, 0],\n... [-1, -1, 2],\n... [1, 1, 1],\n... [0, 1, 1],\n... [0, 2, 1]],\n... dtype=np.float64\n... )\n>>> coder = SparseCoder(\n... dictionary=dictionary, transform_algorithm='lasso_lars',\n... transform_alpha=1e-10,\n... )\n>>> coder.transform(X)\narray([[ 0., 0., -1., 0., 0.],\n [ 0., 1., 1., 0., 0.]])\n\nSee Also\n--------\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA\nsparse_encode" - }, - { - "name": "DictionaryLearning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "n_features", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary elements to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for numerical error." - }, - { - "name": "fit_algorithm", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "* `'lars'`: uses the least angle regression method to solve the lasso problem (:func:`~sklearn.linear_model.lars_path`); * `'cd'`: uses the coordinate descent method to compute the Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be faster if the estimated components are sparse. .. versionadded:: 0.17 *cd* coordinate descent method to improve speed." - }, - { - "name": "transform_algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": true, - "default": "'omp'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to transform the data: - `'lars'`: uses the least angle regression method (:func:`~sklearn.linear_model.lars_path`); - `'lasso_lars'`: uses Lars to compute the Lasso solution. - `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'` will be faster if the estimated components are sparse. - `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution. - `'threshold'`: squashes to zero all coefficients less than alpha from the projection ``dictionary * X'``. .. versionadded:: 0.17 *lasso_cd* coordinate descent method to improve speed." - }, - { - "name": "transform_n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `transform_n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "transform_alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1.0" - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "code_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the code, for warm restart." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial values for the dictionary, for warm restart." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "split_sign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to split the sparse feature vector into the concatenation of its negative part and its positive part. This can improve the performance of downstream classifiers." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initializing the dictionary when ``dict_init`` is not specified, randomly shuffling the data when ``shuffle`` is set to ``True``, and updating the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary .. versionadded:: 0.20" - }, - { - "name": "transform_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where `n_samples` in the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where `n_samples` in the number of samples\n and `n_features` is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the object itself." - } - ], - "docstring": "Dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=n_features\n Number of dictionary elements to extract.\n\nalpha : float, default=1.0\n Sparsity controlling parameter.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for numerical error.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (:func:`~sklearn.linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n faster if the estimated components are sparse.\n\n .. versionadded:: 0.17\n *cd* coordinate descent method to improve speed.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (:func:`~sklearn.linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n will be faster if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n .. versionadded:: 0.17\n *lasso_cd* coordinate descent method to improve speed.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.0\n\nn_jobs : int or None, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the code, for warm restart.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the dictionary, for warm restart.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n dictionary atoms extracted from the data\n\nerror_ : array\n vector of errors at each iteration\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import DictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n... random_state=42,\n... )\n>>> dict_learner = DictionaryLearning(\n... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.88...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.07...\n\nNotes\n-----\n**References:**\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nSee Also\n--------\nSparseCoder\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - }, - { - "name": "MiniBatchDictionaryLearning", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary elements to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Total number of iterations to perform." - }, - { - "name": "fit_algorithm", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used: - `'lars'`: uses the least angle regression method to solve the lasso problem (`linear_model.lars_path`) - `'cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). Lars will be faster if the estimated components are sparse." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of samples in each mini-batch." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the samples before forming batches." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "initial value of the dictionary for warm restart scenarios" - }, - { - "name": "transform_algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": true, - "default": "'omp'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to transform the data: - `'lars'`: uses the least angle regression method (`linear_model.lars_path`); - `'lasso_lars'`: uses Lars to compute the Lasso solution. - `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster if the estimated components are sparse. - `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution. - `'threshold'`: squashes to zero all coefficients less than alpha from the projection ``dictionary * X'``." - }, - { - "name": "transform_n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `transform_n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "transform_alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "split_sign", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to split the sparse feature vector into the concatenation of its negative part and its positive part. This can improve the performance of downstream classifiers." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initializing the dictionary when ``dict_init`` is not specified, randomly shuffling the data when ``shuffle`` is set to ``True``, and updating the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - }, - { - "name": "transform_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "iter_offset", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of iteration on data batches that has been performed before this call to partial_fit. This is optional: if no number is passed, the memory of the object is used." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Updates the model using the data in X as a mini-batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\niter_offset : int, default=None\n The number of iteration on data batches that has been\n performed before this call to partial_fit. This is optional:\n if no number is passed, the memory of the object is\n used.\n\nReturns\n-------\nself : object\n Returns the instance itself." - } - ], - "docstring": "Mini-batch dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of dictionary elements to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter.\n\nn_iter : int, default=1000\n Total number of iterations to perform.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n The algorithm used:\n\n - `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`)\n - `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nbatch_size : int, default=3\n Number of samples in each mini-batch.\n\nshuffle : bool, default=True\n Whether to shuffle the samples before forming batches.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n initial value of the dictionary for warm restart scenarios\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components extracted from the data.\n\ninner_stats_ : tuple of (A, B) ndarrays\n Internal sufficient statistics that are kept by the algorithm.\n Keeping them is useful in online settings, to avoid losing the\n history of the evolution, but they shouldn't have any use for the\n end user.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\nn_iter_ : int\n Number of iterations run.\n\niter_offset_ : int\n The number of iteration on data batches that has been\n performed before.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generattor or by `np.random`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import MiniBatchDictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n... random_state=42)\n>>> dict_learner = MiniBatchDictionaryLearning(\n... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.87...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.10...\n\nNotes\n-----\n**References:**\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nSee Also\n--------\nSparseCoder\nDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - } - ], - "functions": [ - { - "name": "_check_positive_coding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_encode", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows." - }, - { - "name": "gram", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix, `dictionary * dictionary'` gram can be `None` if method is 'threshold'." - }, - { - "name": "cov", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed covariance, `dictionary * X'`." - }, - { - "name": "algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used: * `'lars'`: uses the least angle regression method (`linear_model.lars_path`); * `'lasso_lars'`: uses Lars to compute the Lasso solution; * `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if the estimated components are sparse; * `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution; * `'threshold'`: squashes to zero all coefficients less than regularization from the projection `dictionary * data'`." - }, - { - "name": "regularization", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter. It corresponds to alpha when algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`. Otherwise it corresponds to `n_nonzero_coefs`." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization value of the sparse code. Only used if `algorithm='lasso_cd'`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`." - }, - { - "name": "copy_cov", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy the precomputed covariance matrix; if `False`, it may be overwritten." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If `False`, the input arrays `X` and dictionary will not be checked." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages." - }, - { - "name": "positive: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce a positivity constraint on the sparse code. .. versionadded:: 0.20" - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce a positivity constraint on the sparse code. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generic sparse coding.\n\nEach column of the result is the solution to a Lasso problem.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows.\n\ngram : ndarray of shape (n_components, n_components) or None\n Precomputed Gram matrix, `dictionary * dictionary'`\n gram can be `None` if method is 'threshold'.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary * X'`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\nregularization : int or float, default=None\n The regularization parameter. It corresponds to alpha when\n algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n Otherwise it corresponds to `n_nonzero_coefs`.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse code. Only used if\n `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\ncopy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\ncheck_input : bool, default=True\n If `False`, the input arrays `X` and dictionary will not be checked.\n\nverbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\npositive: bool, default=False\n Whether to enforce a positivity constraint on the sparse code.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_components, n_features)\n The sparse codes.\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder" - }, - { - "name": "sparse_encode", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows for meaningful output." - }, - { - "name": "gram", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix, `dictionary * dictionary'`." - }, - { - "name": "cov", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed covariance, `dictionary' * X`." - }, - { - "name": "algorithm", - "type": "Literal['lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The algorithm used: * `'lars'`: uses the least angle regression method (`linear_model.lars_path`); * `'lasso_lars'`: uses Lars to compute the Lasso solution; * `'lasso_cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if the estimated components are sparse; * `'omp'`: uses orthogonal matching pursuit to estimate the sparse solution; * `'threshold'`: squashes to zero all coefficients less than regularization from the projection `dictionary * data'`." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nonzero coefficients to target in each column of the solution. This is only used by `algorithm='lars'` and `algorithm='omp'` and is overridden by `alpha` in the `omp` case. If `None`, then `n_nonzero_coefs=int(n_features / 10)`." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the penalty applied to the L1 norm. If `algorithm='threshold'`, `alpha` is the absolute value of the threshold below which coefficients will be squashed to zero. If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of the reconstruction error targeted. In this case, it overrides `n_nonzero_coefs`. If `None`, default to 1." - }, - { - "name": "copy_cov", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy the precomputed covariance matrix; if `False`, it may be overwritten." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization value of the sparse codes. Only used if `algorithm='lasso_cd'`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or `'lasso_lars'`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If `False`, the input arrays X and dictionary will not be checked." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the encoding. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sparse coding\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows for meaningful\n output.\n\ngram : ndarray of shape (n_components, n_components), default=None\n Precomputed Gram matrix, `dictionary * dictionary'`.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary' * X`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\nn_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `n_nonzero_coefs=int(n_features / 10)`.\n\nalpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\ncopy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse codes. Only used if\n `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ncheck_input : bool, default=True\n If `False`, the input arrays X and dictionary will not be checked.\n\nverbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\npositive : bool, default=False\n Whether to enforce positivity when finding the encoding.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n The sparse codes\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder" - }, - { - "name": "_update_dict", - "decorators": [], - "parameters": [ - { - "name": "dictionary", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value of the dictionary at the previous iteration." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "code", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sparse coding of the data against which to optimize the dictionary." - }, - { - "name": "verbose: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degree of output the procedure will print." - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degree of output the procedure will print." - }, - { - "name": "return_r2", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to compute and return the residual sum of squares corresponding to the computed solution." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomly initializing the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update the dense dictionary factor in place.\n\nParameters\n----------\ndictionary : ndarray of shape (n_features, n_components)\n Value of the dictionary at the previous iteration.\n\nY : ndarray of shape (n_features, n_samples)\n Data matrix.\n\ncode : ndarray of shape (n_components, n_samples)\n Sparse coding of the data against which to optimize the dictionary.\n\nverbose: bool, default=False\n Degree of output the procedure will print.\n\nreturn_r2 : bool, default=False\n Whether to compute and return the residual sum of squares corresponding\n to the computed solution.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\npositive : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndictionary : ndarray of shape (n_features, n_components)\n Updated dictionary." - }, - { - "name": "dict_learning", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary atoms to extract." - }, - { - "name": "alpha", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the stopping condition." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "The method used: * `'lars'`: uses the least angle regression method to solve the lasso problem (`linear_model.lars_path`); * `'cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). Lars will be faster if the estimated components are sparse." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the dictionary for warm restart scenarios." - }, - { - "name": "code_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the sparse code for warm restart scenarios." - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable that gets invoked every five iterations" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for randomly initializing the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "method_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\nn_components : int\n Number of dictionary atoms to extract.\n\nalpha : int\n Sparsity controlling parameter.\n\nmax_iter : int, default=100\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n The method used:\n\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the sparse code for warm restart scenarios.\n\ncallback : callable, default=None\n Callable that gets invoked every five iterations\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n Maximum number of iterations to perform.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n The sparse code factor in the matrix factorization.\n\ndictionary : ndarray of shape (n_components, n_features),\n The dictionary factor in the matrix factorization.\n\nerrors : array\n Vector of errors at each iteration.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nSee Also\n--------\ndict_learning_online\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - }, - { - "name": "dict_learning_online", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dictionary atoms to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of mini-batch iterations to perform." - }, - { - "name": "return_code", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to also return the code U or just the dictionary `V`." - }, - { - "name": "dict_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for the dictionary for warm restart scenarios." - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "callable that gets invoked every five iterations." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to take in each batch." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control the verbosity of the procedure." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the data before splitting it in batches." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "* `'lars'`: uses the least angle regression method to solve the lasso problem (`linear_model.lars_path`); * `'cd'`: uses the coordinate descent method to compute the Lasso solution (`linear_model.Lasso`). Lars will be faster if the estimated components are sparse." - }, - { - "name": "iter_offset", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Number of previous iterations completed on the dictionary used for initialization." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initializing the dictionary when ``dict_init`` is not specified, randomly shuffling the data when ``shuffle`` is set to ``True``, and updating the dictionary. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_inner_stats", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return the inner statistics A (dictionary covariance) and B (data approximation). Useful to restart the algorithm in an online setting. If `return_inner_stats` is `True`, `return_code` is ignored." - }, - { - "name": "inner_stats", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Inner sufficient statistics that are kept by the algorithm. Passing them at initialization is useful in online settings, to avoid losing the history of the evolution. `A` `(n_components, n_components)` is the dictionary covariance matrix. `B` `(n_features, n_components)` is the data approximation matrix." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - }, - { - "name": "positive_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the dictionary. .. versionadded:: 0.20" - }, - { - "name": "positive_code", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enforce positivity when finding the code. .. versionadded:: 0.20" - }, - { - "name": "method_max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform when solving the lasso problem. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. This is\naccomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\nn_components : int, default=2\n Number of dictionary atoms to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter.\n\nn_iter : int, default=100\n Number of mini-batch iterations to perform.\n\nreturn_code : bool, default=True\n Whether to also return the code U or just the dictionary `V`.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios.\n\ncallback : callable, default=None\n callable that gets invoked every five iterations.\n\nbatch_size : int, default=3\n The number of samples to take in each batch.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nshuffle : bool, default=True\n Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\niter_offset : int, default=0\n Number of previous iterations completed on the dictionary used for\n initialization.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nreturn_inner_stats : bool, default=False\n Return the inner statistics A (dictionary covariance) and B\n (data approximation). Useful to restart the algorithm in an\n online setting. If `return_inner_stats` is `True`, `return_code` is\n ignored.\n\ninner_stats : tuple of (A, B) ndarrays, default=None\n Inner sufficient statistics that are kept by the algorithm.\n Passing them at initialization is useful in online settings, to\n avoid losing the history of the evolution.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n Maximum number of iterations to perform when solving the lasso problem.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components),\n The sparse code (only returned if `return_code=True`).\n\ndictionary : ndarray of shape (n_components, n_features),\n The solutions to the dictionary learning problem.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to `True`.\n\nSee Also\n--------\ndict_learning\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA" - } - ] - }, - { - "name": "sklearn.decomposition._factor_analysis", - "imports": [ - "import warnings", - "from math import sqrt", - "from math import log", - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils.extmath import fast_logdet", - "from utils.extmath import randomized_svd", - "from utils.extmath import squared_norm", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "FactorAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimensionality of latent space, the number of components of ``X`` that are obtained after ``transform``. If None, n_components is set to the number of features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping tolerance for log-likelihood increase." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of X. If ``False``, the input X gets overwritten during fitting." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "noise_variance_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial guess of the noise variance for each feature. If None, it defaults to np.ones(n_features)." - }, - { - "name": "svd_method", - "type": "Literal['lapack', 'randomized']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "Which SVD method to use. If 'lapack' use standard SVD from scipy.linalg, if 'randomized' use fast ``randomized_svd`` function. Defaults to 'randomized'. For most applications 'randomized' will be sufficiently precise while providing significant speed gains. Accuracy can also be improved by setting higher values for `iterated_power`. If this is not sufficient, for maximum precision you should choose 'lapack'." - }, - { - "name": "iterated_power", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations for the power method. 3 by default. Only used if ``svd_method`` equals 'randomized'." - }, - { - "name": "rotation", - "type": "Literal['varimax', 'quartimax']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, apply the indicated rotation. Currently, varimax and quartimax are implemented. See `\"The varimax criterion for analytic rotation in factor analysis\" `_ H. F. Kaiser, 1958. .. versionadded:: 0.24" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Only used when ``svd_method`` equals 'randomized'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the FactorAnalysis model to X using SVD based approach\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply dimensionality reduction to X using the model.\n\nCompute the expected mean of the latent variables.\nSee Barber, 21.2.33 (or Bishop, 12.66).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n The latent variables of X." - }, - { - "name": "get_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data covariance with the FactorAnalysis model.\n\n``cov = components_.T * components_ + diag(noise_variance)``\n\nReturns\n-------\ncov : ndarray of shape (n_features, n_features)\n Estimated covariance of data." - }, - { - "name": "get_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute data precision matrix with the FactorAnalysis model.\n\nReturns\n-------\nprecision : ndarray of shape (n_features, n_features)\n Estimated precision of data." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log-likelihood of each sample\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data\n\nReturns\n-------\nll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the average log-likelihood of the samples\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nll : float\n Average log-likelihood of the samples under the current model" - }, - { - "name": "_rotate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rotate the factor analysis solution." - } - ], - "docstring": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PPCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int, default=None\n Dimensionality of latent space, the number of components\n of ``X`` that are obtained after ``transform``.\n If None, n_components is set to the number of features.\n\ntol : float, defaul=1e-2\n Stopping tolerance for log-likelihood increase.\n\ncopy : bool, default=True\n Whether to make a copy of X. If ``False``, the input X gets overwritten\n during fitting.\n\nmax_iter : int, default=1000\n Maximum number of iterations.\n\nnoise_variance_init : ndarray of shape (n_features,), default=None\n The initial guess of the noise variance for each feature.\n If None, it defaults to np.ones(n_features).\n\nsvd_method : {'lapack', 'randomized'}, default='randomized'\n Which SVD method to use. If 'lapack' use standard SVD from\n scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n Defaults to 'randomized'. For most applications 'randomized' will\n be sufficiently precise while providing significant speed gains.\n Accuracy can also be improved by setting higher values for\n `iterated_power`. If this is not sufficient, for maximum precision\n you should choose 'lapack'.\n\niterated_power : int, default=3\n Number of iterations for the power method. 3 by default. Only used\n if ``svd_method`` equals 'randomized'.\n\nrotation : {'varimax', 'quartimax'}, default=None\n If not None, apply the indicated rotation. Currently, varimax and\n quartimax are implemented. See\n `\"The varimax criterion for analytic rotation in factor analysis\"\n `_\n H. F. Kaiser, 1958.\n\n .. versionadded:: 0.24\n\nrandom_state : int or RandomState instance, default=0\n Only used when ``svd_method`` equals 'randomized'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\nloglike_ : list of shape (n_iterations,)\n The log likelihood at each iteration.\n\nnoise_variance_ : ndarray of shape (n_features,)\n The estimated noise variance for each feature.\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FactorAnalysis\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FactorAnalysis(n_components=7, random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nReferences\n----------\n- David Barber, Bayesian Reasoning and Machine Learning,\n Algorithm 21.1.\n\n- Christopher M. Bishop: Pattern Recognition and Machine Learning,\n Chapter 12.2.4.\n\nSee Also\n--------\nPCA: Principal component analysis is also a latent linear variable model\n which however assumes equal noise variance for each feature.\n This extra assumption makes probabilistic PCA faster as it can be\n computed in closed form.\nFastICA: Independent component analysis, a latent variable model with\n non-Gaussian latent variables." - } - ], - "functions": [ - { - "name": "_ortho_rotation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return rotated components." - } - ] - }, - { - "name": "sklearn.decomposition._fastica", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from exceptions import ConvergenceWarning", - "from utils import check_array", - "from utils import as_float_array", - "from utils import check_random_state", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "FastICA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to use. If None is passed, all are used." - }, - { - "name": "algorithm", - "type": "Literal['parallel', 'deflation']", - "hasDefault": true, - "default": "'parallel'", - "limitation": null, - "ignored": false, - "docstring": "Apply parallel or deflational algorithm for FastICA." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If whiten is false, the data is already considered to be whitened, and no whitening is performed." - }, - { - "name": "fun", - "type": "Literal['logcosh', 'exp', 'cube']", - "hasDefault": true, - "default": "'logcosh'", - "limitation": null, - "ignored": false, - "docstring": "The functional form of the G function used in the approximation to neg-entropy. Could be either 'logcosh', 'exp', or 'cube'. You can also provide your own function. It should return a tuple containing the value of the function, and of its derivative, in the point. Example:: def my_g(x): return x ** 3, (3 * x ** 2).mean(axis=-1)" - }, - { - "name": "fun_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments to send to the functional form. If empty and if fun='logcosh', fun_args will take value {'alpha' : 1.0}." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations during fit." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance on update at each iteration." - }, - { - "name": "w_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The mixing matrix to be used to initialize the algorithm." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to initialize ``w_init`` when not specified, with a normal distribution. Pass an int, for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "compute_sources", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If False, sources are not computes but only the rotation matrix. This can save memory when working with big data. Defaults to False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ncompute_sources : bool, default=False\n If False, sources are not computes but only the rotation matrix.\n This can save memory when working with big data. Defaults to False.\n\nReturns\n-------\n X_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model and recover the sources from X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to transform, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, data passed to fit can be overwritten. Defaults to True." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Recover the sources from X (apply the unmixing matrix).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform, where n_samples is the number of samples\n and n_features is the number of features.\n\ncopy : bool, default=True\n If False, data passed to fit can be overwritten. Defaults to True.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sources, where n_samples is the number of samples and n_components is the number of components." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, data passed to fit are overwritten. Defaults to True." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the sources back to the mixed data (apply mixing matrix).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n Sources, where n_samples is the number of samples\n and n_components is the number of components.\ncopy : bool, default=True\n If False, data passed to fit are overwritten. Defaults to True.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)" - } - ], - "docstring": "FastICA: a fast algorithm for Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n Apply parallel or deflational algorithm for FastICA.\n\nwhiten : bool, default=True\n If whiten is false, the data is already considered to be\n whitened, and no whitening is performed.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. Example::\n\n def my_g(x):\n return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n Arguments to send to the functional form.\n If empty and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n Maximum number of iterations during fit.\n\ntol : float, default=1e-4\n Tolerance on update at each iteration.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n The mixing matrix to be used to initialize the algorithm.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The linear operator to apply to the data to get the independent\n sources. This is equal to the unmixing matrix when ``whiten`` is\n False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n ``whiten`` is True.\n\nmixing_ : ndarray of shape (n_features, n_components)\n The pseudo-inverse of ``components_``. It is the linear operator\n that maps independent sources to the data.\n\nmean_ : ndarray of shape(n_features,)\n The mean over features. Only set if `self.whiten` is True.\n\nn_iter_ : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge.\n\nwhitening_ : ndarray of shape (n_components, n_features)\n Only set if whiten is 'True'. This is the pre-whitening matrix\n that projects data onto the first `n_components` principal components.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FastICA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FastICA(n_components=7,\n... random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nNotes\n-----\nImplementation based on\n*A. Hyvarinen and E. Oja, Independent Component Analysis:\nAlgorithms and Applications, Neural Networks, 13(4-5), 2000,\npp. 411-430*" - } - ], - "functions": [ - { - "name": "_gs_decorrelation", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array to be orthogonalized" - }, - { - "name": "W", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Null space definition" - }, - { - "name": "j", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The no of (from the first) rows of Null space W wrt which w is orthogonalized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthonormalize w wrt the first j rows of W.\n\nParameters\n----------\nw : ndarray of shape (n,)\n Array to be orthogonalized\n\nW : ndarray of shape (p, n)\n Null space definition\n\nj : int < p\n The no of (from the first) rows of Null space W wrt which w is\n orthogonalized.\n\nNotes\n-----\nAssumes that W is orthogonal\nw changed in place" - }, - { - "name": "_sym_decorrelation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Symmetric decorrelation\ni.e. W <- (W * W.T) ^{-1/2} * W" - }, - { - "name": "_ica_def", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Deflationary FastICA using fun approx to neg-entropy function\n\nUsed internally by FastICA." - }, - { - "name": "_ica_par", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Parallel FastICA.\n\nUsed internally by FastICA --main loop" - }, - { - "name": "_logcosh", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_exp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_cube", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fastica", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to extract. If None no dimension reduction is performed." - }, - { - "name": "algorithm", - "type": "Literal['parallel', 'deflation']", - "hasDefault": true, - "default": "'parallel'", - "limitation": null, - "ignored": false, - "docstring": "Apply a parallel or deflational FASTICA algorithm." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True perform an initial whitening of the data. If False, the data is assumed to have already been preprocessed: it should be centered, normed and white. Otherwise you will get incorrect results. In this case the parameter n_components will be ignored." - }, - { - "name": "fun", - "type": "Literal['logcosh', 'exp', 'cube']", - "hasDefault": true, - "default": "'logcosh'", - "limitation": null, - "ignored": false, - "docstring": "The functional form of the G function used in the approximation to neg-entropy. Could be either 'logcosh', 'exp', or 'cube'. You can also provide your own function. It should return a tuple containing the value of the function, and of its derivative, in the point. The derivative should be averaged along its last dimension. Example: def my_g(x): return x ** 3, np.mean(3 * x ** 2, axis=-1)" - }, - { - "name": "fun_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments to send to the functional form. If empty or None and if fun='logcosh', fun_args will take value {'alpha' : 1.0}" - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-04", - "limitation": null, - "ignored": false, - "docstring": "A positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged." - }, - { - "name": "w_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial un-mixing array of dimension (n.comp,n.comp). If None (default) then an array of normal r.v.'s is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to initialize ``w_init`` when not specified, with a normal distribution. Pass an int, for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "return_X_mean", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, X_mean is returned too." - }, - { - "name": "compute_sources", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, sources are not computed, but only the rotation matrix. This can save memory when working with big data. Defaults to True." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform Fast Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nn_components : int, default=None\n Number of components to extract. If None no dimension reduction\n is performed.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n Apply a parallel or deflational FASTICA algorithm.\n\nwhiten : bool, default=True\n If True perform an initial whitening of the data.\n If False, the data is assumed to have already been\n preprocessed: it should be centered, normed and white.\n Otherwise you will get incorrect results.\n In this case the parameter n_components will be ignored.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. The derivative should be averaged along its last dimension.\n Example:\n\n def my_g(x):\n return x ** 3, np.mean(3 * x ** 2, axis=-1)\n\nfun_args : dict, default=None\n Arguments to send to the functional form.\n If empty or None and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}\n\nmax_iter : int, default=200\n Maximum number of iterations to perform.\n\ntol : float, default=1e-04\n A positive scalar giving the tolerance at which the\n un-mixing matrix is considered to have converged.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n Initial un-mixing array of dimension (n.comp,n.comp).\n If None (default) then an array of normal r.v.'s is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nreturn_X_mean : bool, default=False\n If True, X_mean is returned too.\n\ncompute_sources : bool, default=True\n If False, sources are not computed, but only the rotation matrix.\n This can save memory when working with big data. Defaults to True.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\nK : ndarray of shape (n_components, n_features) or None\n If whiten is 'True', K is the pre-whitening matrix that projects data\n onto the first n_components principal components. If whiten is 'False',\n K is 'None'.\n\nW : ndarray of shape (n_components, n_components)\n The square matrix that unmixes the data after whitening.\n The mixing matrix is the pseudo-inverse of matrix ``W K``\n if K is not None, else it is the inverse of W.\n\nS : ndarray of shape (n_samples, n_components) or None\n Estimated source matrix\n\nX_mean : ndarray of shape (n_features,)\n The mean over features. Returned only if return_X_mean is True.\n\nn_iter : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge. This is\n returned only when return_n_iter is set to `True`.\n\nNotes\n-----\n\nThe data matrix X is considered to be a linear combination of\nnon-Gaussian (independent) components i.e. X = AS where columns of S\ncontain the independent components and A is a linear mixing\nmatrix. In short ICA attempts to `un-mix' the data by estimating an\nun-mixing matrix W where ``S = W K X.``\nWhile FastICA was proposed to estimate as many sources\nas features, it is possible to estimate less by setting\nn_components < n_features. It this case K is not a square matrix\nand the estimated A is the pseudo-inverse of ``W K``.\n\nThis implementation was originally made for data of shape\n[n_features, n_samples]. Now the input is transposed\nbefore the algorithm is applied. This makes it slightly\nfaster for Fortran-ordered input.\n\nImplemented using FastICA:\n*A. Hyvarinen and E. Oja, Independent Component Analysis:\nAlgorithms and Applications, Neural Networks, 13(4-5), 2000,\npp. 411-430*" - } - ] - }, - { - "name": "sklearn.decomposition._incremental_pca", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from scipy import sparse", - "from _base import _BasePCA", - "from utils import gen_batches", - "from utils.extmath import svd_flip", - "from utils.extmath import _incremental_mean_and_var", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "IncrementalPCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. If ``n_components`` is ``None``, then ``n_components`` is set to ``min(n_samples, n_features)``." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When True (False by default) the ``components_`` vectors are divided by ``n_samples`` times ``components_`` to ensure uncorrelated outputs with unit component-wise variances. Whitening will remove some information from the transformed signal (the relative variance scales of the components) but can sometimes improve the predictive accuracy of the downstream estimators by making data respect some hard-wired assumptions." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, X will be overwritten. ``copy=False`` can be used to save memory but is unsafe for general use." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to use for each batch. Only used when calling ``fit``. If ``batch_size`` is ``None``, then ``batch_size`` is inferred from the data and set to ``5 * n_features``, to provide a balance between approximation accuracy and memory consumption." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X, using minibatches of size batch_size.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Run check_array on X." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Incremental fit with X. All of X is processed as a single batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ncheck_input : bool, default=True\n Run check_array on X.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set, using minibatches of size batch_size if X is\nsparse.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.decomposition import IncrementalPCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n... [1, 1], [2, 1], [3, 2]])\n>>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n>>> ipca.fit(X)\nIncrementalPCA(batch_size=3, n_components=2)\n>>> ipca.transform(X) # doctest: +SKIP" - } - ], - "docstring": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nn_components : int, default=None\n Number of components to keep. If ``n_components`` is ``None``,\n then ``n_components`` is set to ``min(n_samples, n_features)``.\n\nwhiten : bool, default=False\n When True (False by default) the ``components_`` vectors are divided\n by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometimes\n improve the predictive accuracy of the downstream estimators by\n making data respect some hard-wired assumptions.\n\ncopy : bool, default=True\n If False, X will be overwritten. ``copy=False`` can be used to\n save memory but is unsafe for general use.\n\nbatch_size : int, default=None\n The number of samples to use for each batch. Only used when calling\n ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n is inferred from the data and set to ``5 * n_features``, to provide a\n balance between approximation accuracy and memory consumption.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\nexplained_variance_ : ndarray of shape (n_components,)\n Variance explained by each of the selected components.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If all components are stored, the sum of explained variances is equal\n to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\nvar_ : ndarray of shape (n_features,)\n Per-feature empirical variance, aggregate over calls to\n ``partial_fit``.\n\nnoise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf.\n\nn_components_ : int\n The estimated number of components. Relevant when\n ``n_components=None``.\n\nn_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\nbatch_size_ : int\n Inferred batch size from ``batch_size``.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import IncrementalPCA\n>>> from scipy import sparse\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n>>> # either partially fit on smaller batches of data\n>>> transformer.partial_fit(X[:100, :])\nIncrementalPCA(batch_size=200, n_components=7)\n>>> # or let the fit function itself divide the data into batches\n>>> X_sparse = sparse.csr_matrix(X)\n>>> X_transformed = transformer.fit_transform(X_sparse)\n>>> X_transformed.shape\n(1797, 7)\n\nNotes\n-----\nImplements the incremental PCA model from:\n*D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\npp. 125-141, May 2008.*\nSee https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\nThis model is an extension of the Sequential Karhunen-Loeve Transform from:\n*A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\nits Application to Images, IEEE Transactions on Image Processing, Volume 9,\nNumber 8, pp. 1371-1374, August 2000.*\nSee https://www.cs.technion.ac.il/~mic/doc/skl-ip.pdf\n\nWe have specifically abstained from an optimization used by authors of both\npapers, a QR decomposition used in specific situations to reduce the\nalgorithmic complexity of the SVD. The source for this technique is\n*Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\nsection 5.4.4, pp 252-253.*. This technique has been omitted because it is\nadvantageous only when decomposing a matrix with ``n_samples`` (rows)\n>= 5/3 * ``n_features`` (columns), and hurts the readability of the\nimplemented algorithm. This would be a good opportunity for future\noptimization, if it is deemed necessary.\n\nReferences\n----------\nD. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77,\nIssue 1-3, pp. 125-141, May 2008.\n\nG. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\nSection 5.4.4, pp. 252-253.\n\nSee Also\n--------\nPCA\nKernelPCA\nSparsePCA\nTruncatedSVD" - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._kernel_pca", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from scipy.sparse.linalg import eigsh", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import svd_flip", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_psd_eigenvalues", - "from utils.deprecation import deprecated", - "from exceptions import NotFittedError", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from preprocessing import KernelCenterer", - "from metrics.pairwise import pairwise_kernels", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KernelPCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components. If None, all non-zero components are kept." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']", - "hasDefault": true, - "default": "'linear'", - "limitation": null, - "ignored": false, - "docstring": "Kernel used for PCA." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree for poly kernels. Ignored by other kernels." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Independent term in poly and sigmoid kernels. Ignored by other kernels." - }, - { - "name": "kernel_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True)." - }, - { - "name": "fit_inverse_transform", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point)" - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'dense', 'arpack']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for arpack. If None, optimal value will be chosen by arpack." - }, - { - "name": "remove_zero_eig", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. .. versionadded:: 0.18" - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, input X is copied and stored by the model in the `X_fit_` attribute. If no further changes will be done to X, setting `copy_X=False` saves memory by storing a reference. .. versionadded:: 0.18" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit's using kernel K" - }, - { - "name": "_fit_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X back to original space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_components)\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)\n\nReferences\n----------\n\"Learning to Find Pre-Images\", G BakIr et al, 2004." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Kernel Principal component analysis (KPCA).\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components. If None, all non-zero components are kept.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}, default='linear'\n Kernel used for PCA.\n\ngamma : float, default=None\n Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\ndegree : int, default=3\n Degree for poly kernels. Ignored by other kernels.\n\ncoef0 : float, default=1\n Independent term in poly and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict, default=None\n Parameters (keyword arguments) and\n values for kernel passed as callable object.\n Ignored by other kernels.\n\nalpha : float, default=1.0\n Hyperparameter of the ridge regression that learns the\n inverse transform (when fit_inverse_transform=True).\n\nfit_inverse_transform : bool, default=False\n Learn the inverse transform for non-precomputed kernels.\n (i.e. learn to find the pre-image of a point)\n\neigen_solver : {'auto', 'dense', 'arpack'}, default='auto'\n Select eigensolver to use. If n_components is much less than\n the number of training samples, arpack may be more efficient\n than the dense eigensolver.\n\ntol : float, default=0\n Convergence tolerance for arpack.\n If 0, optimal value will be chosen by arpack.\n\nmax_iter : int, default=None\n Maximum number of iterations for arpack.\n If None, optimal value will be chosen by arpack.\n\nremove_zero_eig : bool, default=False\n If True, then all components with zero eigenvalues are removed, so\n that the number of components in the output may be < n_components\n (and sometimes even zero due to numerical instability).\n When n_components is None, this parameter is ignored and components\n with zero eigenvalues are removed regardless.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18\n\ncopy_X : bool, default=True\n If True, input X is copied and stored by the model in the `X_fit_`\n attribute. If no further changes will be done to X, setting\n `copy_X=False` saves memory by storing a reference.\n\n .. versionadded:: 0.18\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nAttributes\n----------\nlambdas_ : ndarray of shape (n_components,)\n Eigenvalues of the centered kernel matrix in decreasing order.\n If `n_components` and `remove_zero_eig` are not set,\n then all values are stored.\n\nalphas_ : ndarray of shape (n_samples, n_components)\n Eigenvectors of the centered kernel matrix. If `n_components` and\n `remove_zero_eig` are not set, then all components are stored.\n\ndual_coef_ : ndarray of shape (n_samples, n_features)\n Inverse transform matrix. Only available when\n ``fit_inverse_transform`` is True.\n\nX_transformed_fit_ : ndarray of shape (n_samples, n_components)\n Projection of the fitted data on the kernel principal components.\n Only available when ``fit_inverse_transform`` is True.\n\nX_fit_ : ndarray of shape (n_samples, n_features)\n The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n a reference. This attribute is used for the calls to transform.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import KernelPCA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = KernelPCA(n_components=7, kernel='linear')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nReferences\n----------\nKernel PCA was introduced in:\n Bernhard Schoelkopf, Alexander J. Smola,\n and Klaus-Robert Mueller. 1999. Kernel principal\n component analysis. In Advances in kernel methods,\n MIT Press, Cambridge, MA, USA 327-352." - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._lda", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.special import gammaln", - "from scipy.special import logsumexp", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_random_state", - "from utils import gen_batches", - "from utils import gen_even_slices", - "from utils.validation import check_non_negative", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from _online_lda_fast import mean_change", - "from _online_lda_fast import _dirichlet_expectation_1d", - "from _online_lda_fast import _dirichlet_expectation_2d" - ], - "classes": [ - { - "name": "LatentDirichletAllocation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of topics. .. versionchanged:: 0.19 ``n_topics`` was renamed to ``n_components``" - }, - { - "name": "doc_topic_prior", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior of document topic distribution `theta`. If the value is None, defaults to `1 / n_components`. In [1]_, this is called `alpha`." - }, - { - "name": "topic_word_prior", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior of topic word distribution `beta`. If the value is None, defaults to `1 / n_components`. In [1]_, this is called `eta`." - }, - { - "name": "learning_method", - "type": "Literal['batch', 'online']", - "hasDefault": true, - "default": "'batch'", - "limitation": null, - "ignored": false, - "docstring": "Method used to update `_component`. Only used in :meth:`fit` method. In general, if the data size is large, the online update will be much faster than the batch update. Valid options:: 'batch': Batch variational Bayes method. Use all training data in each EM update. Old `components_` will be overwritten in each iteration. 'online': Online variational Bayes method. In each EM update, use mini-batch of training data to update the ``components_`` variable incrementally. The learning rate is controlled by the ``learning_decay`` and the ``learning_offset`` parameters. .. versionchanged:: 0.20 The default learning method is now ``\"batch\"``." - }, - { - "name": "learning_decay", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "It is a parameter that control learning rate in the online learning method. The value should be set between (0.5, 1.0] to guarantee asymptotic convergence. When the value is 0.0 and batch_size is ``n_samples``, the update method is same as batch learning. In the literature, this is called kappa." - }, - { - "name": "learning_offset", - "type": "float", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "A (positive) parameter that downweights early iterations in online learning. It should be greater than 1.0. In the literature, this is called tau_0." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "128", - "limitation": null, - "ignored": false, - "docstring": "Number of documents to use in each EM iteration. Only used in online learning." - }, - { - "name": "evaluate_every", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "How often to evaluate perplexity. Only used in `fit` method. set it to 0 or negative number to not evaluate perplexity in training at all. Evaluating perplexity can help you check convergence in training process, but it will also increase total training time. Evaluating perplexity in every iteration might increase training time up to two-fold." - }, - { - "name": "total_samples", - "type": "int", - "hasDefault": true, - "default": "1e6", - "limitation": null, - "ignored": false, - "docstring": "Total number of documents. Only used in the :meth:`partial_fit` method." - }, - { - "name": "perp_tol", - "type": "float", - "hasDefault": true, - "default": "1e-1", - "limitation": null, - "ignored": false, - "docstring": "Perplexity tolerance in batch learning. Only used when ``evaluate_every`` is greater than 0." - }, - { - "name": "mean_change_tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Stopping tolerance for updating document topic distribution in E-step." - }, - { - "name": "max_doc_update_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Max number of iterations for updating document topic distribution in the E-step." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use in the E-step. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check model parameters." - }, - { - "name": "_init_latent_vars", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize latent variables." - }, - { - "name": "_e_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "cal_sstats", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate whether to calculate sufficient statistics or not. Set ``cal_sstats`` to True when we need to run M-step." - }, - { - "name": "random_init", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate whether to initialize document topic distribution randomly in the E-step. Set it to True in training steps." - }, - { - "name": "parallel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-initialized instance of joblib.Parallel." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "E-step in EM update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ncal_sstats : bool\n Parameter that indicate whether to calculate sufficient statistics\n or not. Set ``cal_sstats`` to True when we need to run M-step.\n\nrandom_init : bool\n Parameter that indicate whether to initialize document topic\n distribution randomly in the E-step. Set it to True in training\n steps.\n\nparallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each\n document. In the literature, this is called `gamma`.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, it will be None." - }, - { - "name": "_em_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "total_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Total number of documents. It is only used when batch_update is `False`." - }, - { - "name": "batch_update", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that controls updating method. `True` for batch learning, `False` for online learning." - }, - { - "name": "parallel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-initialized instance of joblib.Parallel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "EM update for 1 iteration.\n\nupdate `_component` by batch VB or online VB.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ntotal_samples : int\n Total number of documents. It is only used when\n batch_update is `False`.\n\nbatch_update : bool\n Parameter that controls updating method.\n `True` for batch learning, `False` for online learning.\n\nparallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Unnormalized document topic distribution." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_non_neg_array", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check X format\n\ncheck X format and make sure no negative value in X.\n\nParameters\n----------\nX : array-like or sparse matrix" - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online VB with Mini-Batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn model for the data X with variational Bayes method.\n\nWhen `learning_method` is 'online', use mini-batch update.\nOtherwise, use batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "_unnormalized_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data X according to fitted model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data X according to the fitted model.\n\n .. versionchanged:: 0.18\n *doc_topic_distr* is now normalized\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X." - }, - { - "name": "_approx_bound", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "doc_topic_distr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document topic distribution. In the literature, this is called gamma." - }, - { - "name": "sub_sampling", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Compensate for subsampling of documents. It is used in calculate bound in online learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the variational bound.\n\nEstimate the variational bound over \"all documents\" using only the\ndocuments passed in as X. Since log-likelihood of each word cannot\nbe computed directly, we use this bound to estimate it.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution. In the literature, this is called\n gamma.\n\nsub_sampling : bool, default=False\n Compensate for subsampling of documents.\n It is used in calculate bound in online learning.\n\nReturns\n-------\nscore : float" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate approximate log-likelihood as score.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nscore : float\n Use approximate bound as score." - }, - { - "name": "_perplexity_precomp_distr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "doc_topic_distr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document topic distribution. If it is None, it will be generated by applying transform on X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate approximate perplexity for data X with ability to accept\nprecomputed doc_topic_distr\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ndoc_topic_distr : ndarray of shape (n_samples, n_components), default=None\n Document topic distribution.\n If it is None, it will be generated by applying transform on X.\n\nReturns\n-------\nscore : float\n Perplexity score." - }, - { - "name": "perplexity", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "sub_sampling", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Do sub-sampling or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n *doc_topic_distr* argument has been deprecated and is ignored\n because user no longer has access to unnormalized distribution\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nsub_sampling : bool\n Do sub-sampling or not.\n\nReturns\n-------\nscore : float\n Perplexity score." - } - ], - "docstring": "Latent Dirichlet Allocation with online variational Bayes algorithm\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=10\n Number of topics.\n\n .. versionchanged:: 0.19\n ``n_topics`` was renamed to ``n_components``\n\ndoc_topic_prior : float, default=None\n Prior of document topic distribution `theta`. If the value is None,\n defaults to `1 / n_components`.\n In [1]_, this is called `alpha`.\n\ntopic_word_prior : float, default=None\n Prior of topic word distribution `beta`. If the value is None, defaults\n to `1 / n_components`.\n In [1]_, this is called `eta`.\n\nlearning_method : {'batch', 'online'}, default='batch'\n Method used to update `_component`. Only used in :meth:`fit` method.\n In general, if the data size is large, the online update will be much\n faster than the batch update.\n\n Valid options::\n\n 'batch': Batch variational Bayes method. Use all training data in\n each EM update.\n Old `components_` will be overwritten in each iteration.\n 'online': Online variational Bayes method. In each EM update, use\n mini-batch of training data to update the ``components_``\n variable incrementally. The learning rate is controlled by the\n ``learning_decay`` and the ``learning_offset`` parameters.\n\n .. versionchanged:: 0.20\n The default learning method is now ``\"batch\"``.\n\nlearning_decay : float, default=0.7\n It is a parameter that control learning rate in the online learning\n method. The value should be set between (0.5, 1.0] to guarantee\n asymptotic convergence. When the value is 0.0 and batch_size is\n ``n_samples``, the update method is same as batch learning. In the\n literature, this is called kappa.\n\nlearning_offset : float, default=10.\n A (positive) parameter that downweights early iterations in online\n learning. It should be greater than 1.0. In the literature, this is\n called tau_0.\n\nmax_iter : int, default=10\n The maximum number of iterations.\n\nbatch_size : int, default=128\n Number of documents to use in each EM iteration. Only used in online\n learning.\n\nevaluate_every : int, default=-1\n How often to evaluate perplexity. Only used in `fit` method.\n set it to 0 or negative number to not evaluate perplexity in\n training at all. Evaluating perplexity can help you check convergence\n in training process, but it will also increase total training time.\n Evaluating perplexity in every iteration might increase training time\n up to two-fold.\n\ntotal_samples : int, default=1e6\n Total number of documents. Only used in the :meth:`partial_fit` method.\n\nperp_tol : float, default=1e-1\n Perplexity tolerance in batch learning. Only used when\n ``evaluate_every`` is greater than 0.\n\nmean_change_tol : float, default=1e-3\n Stopping tolerance for updating document topic distribution in E-step.\n\nmax_doc_update_iter : int, default=100\n Max number of iterations for updating document topic distribution in\n the E-step.\n\nn_jobs : int, default=None\n The number of jobs to use in the E-step.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Variational parameters for topic word distribution. Since the complete\n conditional for topic word distribution is a Dirichlet,\n ``components_[i, j]`` can be viewed as pseudocount that represents the\n number of times word `j` was assigned to topic `i`.\n It can also be viewed as distribution over the words for each topic\n after normalization:\n ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\nexp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\nn_batch_iter_ : int\n Number of iterations of the EM step.\n\nn_iter_ : int\n Number of passes over the dataset.\n\nbound_ : float\n Final perplexity score on training set.\n\ndoc_topic_prior_ : float\n Prior of document topic distribution `theta`. If the value is None,\n it is `1 / n_components`.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\ntopic_word_prior_ : float\n Prior of topic word distribution `beta`. If the value is None, it is\n `1 / n_components`.\n\nExamples\n--------\n>>> from sklearn.decomposition import LatentDirichletAllocation\n>>> from sklearn.datasets import make_multilabel_classification\n>>> # This produces a feature matrix of token counts, similar to what\n>>> # CountVectorizer would produce on text.\n>>> X, _ = make_multilabel_classification(random_state=0)\n>>> lda = LatentDirichletAllocation(n_components=5,\n... random_state=0)\n>>> lda.fit(X)\nLatentDirichletAllocation(...)\n>>> # get topics for some given samples:\n>>> lda.transform(X[-2:])\narray([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586 ]])\n\nReferences\n----------\n.. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n Hoffman, David M. Blei, Francis Bach, 2010\n\n[2] \"Stochastic Variational Inference\", Matthew D. Hoffman, David M. Blei,\n Chong Wang, John Paisley, 2013\n\n[3] Matthew D. Hoffman's onlineldavb code. Link:\n https://github.com/blei-lab/onlineldavb" - } - ], - "functions": [ - { - "name": "_update_doc_distribution", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document word matrix." - }, - { - "name": "exp_topic_word_distr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Exponential value of expectation of log topic word distribution. In the literature, this is `exp(E[log(beta)])`." - }, - { - "name": "doc_topic_prior", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Prior of document topic distribution `theta`." - }, - { - "name": "max_iters", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Max number of iterations for updating document topic distribution in the E-step." - }, - { - "name": "mean_change_tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping tolerance for updating document topic distribution in E-setp." - }, - { - "name": "cal_sstats", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate to calculate sufficient statistics or not. Set `cal_sstats` to `True` when we need to run M-step." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter that indicate how to initialize document topic distribution. Set `random_state` to None will initialize document topic distribution to a constant number." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "E-step: update document-topic distribution.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nexp_topic_word_distr : ndarray of shape (n_topics, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\ndoc_topic_prior : float\n Prior of document topic distribution `theta`.\n\nmax_iters : int\n Max number of iterations for updating document topic distribution in\n the E-step.\n\nmean_change_tol : float\n Stopping tolerance for updating document topic distribution in E-setp.\n\ncal_sstats : bool\n Parameter that indicate to calculate sufficient statistics or not.\n Set `cal_sstats` to `True` when we need to run M-step.\n\nrandom_state : RandomState instance or None\n Parameter that indicate how to initialize document topic distribution.\n Set `random_state` to None will initialize document topic distribution\n to a constant number.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each document.\n In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n from it.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, this will be None." - } - ] - }, - { - "name": "sklearn.decomposition._nmf", - "imports": [ - "import numbers", - "import numpy as np", - "import scipy.sparse as sp", - "import time", - "import warnings", - "from math import sqrt", - "from _cdnmf_fast import _update_cdnmf_fast", - "from _config import config_context", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from exceptions import ConvergenceWarning", - "from utils import check_random_state", - "from utils import check_array", - "from utils.extmath import randomized_svd", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import squared_norm", - "from utils.validation import check_is_fitted", - "from utils.validation import check_non_negative", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "NMF", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components, if n_components is not set all features are kept." - }, - { - "name": "init", - "type": "Literal['random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method used to initialize the procedure. Default: None. Valid options: - `None`: 'nndsvd' if n_components <= min(n_samples, n_features), otherwise random. - `'random'`: non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - `'nndsvda'`: NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - `'nndsvdar'` NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - `'custom'`: use custom matrices W and H" - }, - { - "name": "solver", - "type": "Literal['cd', 'mu']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "Numerical solver to use: 'cd' is a Coordinate Descent solver. 'mu' is a Multiplicative Update solver. .. versionadded:: 0.17 Coordinate Descent solver. .. versionadded:: 0.19 Multiplicative Update solver." - }, - { - "name": "beta_loss", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": true, - "default": "'frobenius'", - "limitation": null, - "ignored": false, - "docstring": "Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros. Used only in 'mu' solver. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations before timing out." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization terms. Set it to zero to have no regularization. .. versionadded:: 0.17 *alpha* used in the Coordinate Descent solver." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The regularization mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm). For l1_ratio = 1 it is an elementwise L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. .. versionadded:: 0.17 Regularization parameter *l1_ratio* used in the Coordinate Descent solver." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Whether to be verbose." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, randomize the order of coordinates in the CD solver. .. versionadded:: 0.17 *shuffle* parameter used in the Coordinate Descent solver." - }, - { - "name": "regularization", - "type": "Optional[Literal['both', 'components', 'transformation']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Select whether the regularization affects the components (H), the transformation (W), both or none of them. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix to be decomposed" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\ny : Ignored\n\nW : array-like of shape (n_samples, n_components)\n If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n If init='custom', it is used as initial guess for the solution.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix to be decomposed" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn a NMF model for the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\ny : Ignored\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix to be transformed by the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the data X according to the fitted NMF model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be transformed by the model.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Transformed data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "W", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Transformed data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data back to its original space.\n\nParameters\n----------\nW : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Transformed data matrix.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Data matrix of original shape.\n\n.. versionadded:: 0.18" - } - ], - "docstring": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{Fro}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nFor multiplicative-update ('mu') solver, the Frobenius norm\n(:math:`0.5 * ||X - WH||_{Fro}^2`) can be changed into another\nbeta-divergence loss, by changing the beta_loss parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - `None`: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise random.\n\n - `'random'`: non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - `'nndsvdar'` NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - `'custom'`: use custom matrices W and H\n\nsolver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n 'cd' is a Coordinate Descent solver.\n 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nalpha : float, default=0.\n Constant that multiplies the regularization terms. Set it to zero to\n have no regularization.\n\n .. versionadded:: 0.17\n *alpha* used in the Coordinate Descent solver.\n\nl1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n .. versionadded:: 0.17\n Regularization parameter *l1_ratio* used in the Coordinate Descent\n solver.\n\nverbose : int, default=0\n Whether to be verbose.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\n .. versionadded:: 0.17\n *shuffle* parameter used in the Coordinate Descent solver.\n\nregularization : {'both', 'components', 'transformation', None}, default='both'\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n The number of components. It is same as the `n_components` parameter\n if it was given. Otherwise, it will be same as the number of\n features.\n\nreconstruction_err_ : float\n Frobenius norm of the matrix difference, or beta-divergence, between\n the training data ``X`` and the reconstructed data ``WH`` from\n the fitted model.\n\nn_iter_ : int\n Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import NMF\n>>> model = NMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_\n\nReferences\n----------\nCichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.\n\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9)." - } - ], - "functions": [ - { - "name": "norm", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vector for which to compute the norm." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dot product-based Euclidean norm implementation.\n\nSee: http://fseoane.net/blog/2011/computing-the-vector-norm/\n\nParameters\n----------\nx : array-like\n Vector for which to compute the norm." - }, - { - "name": "trace_dot", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First matrix." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Second matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Trace of np.dot(X, Y.T).\n\nParameters\n----------\nX : array-like\n First matrix.\nY : array-like\n Second matrix." - }, - { - "name": "_check_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_beta_divergence", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "W", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "H", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "beta", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter of the beta-divergence. If beta == 2, this is half the Frobenius *squared* norm. If beta == 1, this is the generalized Kullback-Leibler divergence. If beta == 0, this is the Itakura-Saito divergence. Else, this is the general beta-divergence." - }, - { - "name": "square_root", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return np.sqrt(2 * res) For beta == 2, it corresponds to the Frobenius norm." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the beta-divergence of X and dot(W, H).\n\nParameters\n----------\nX : float or array-like of shape (n_samples, n_features)\n\nW : float or array-like of shape (n_samples, n_components)\n\nH : float or array-like of shape (n_components, n_features)\n\nbeta : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}\n Parameter of the beta-divergence.\n If beta == 2, this is half the Frobenius *squared* norm.\n If beta == 1, this is the generalized Kullback-Leibler divergence.\n If beta == 0, this is the Itakura-Saito divergence.\n Else, this is the general beta-divergence.\n\nsquare_root : bool, default=False\n If True, return np.sqrt(2 * res)\n For beta == 2, it corresponds to the Frobenius norm.\n\nReturns\n-------\n res : float\n Beta divergence of X and np.dot(X, H)." - }, - { - "name": "_special_sparse_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes np.dot(W, H), only where X is non zero." - }, - { - "name": "_compute_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute L1 and L2 regularization coefficients for W and H." - }, - { - "name": "_check_string_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_beta_loss_to_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert string beta_loss to float." - }, - { - "name": "_initialize_nmf", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix to be decomposed." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of components desired in the approximation." - }, - { - "name": "init", - "type": "Literal['random', 'nndsvd', 'nndsvda', 'nndsvdar']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method used to initialize the procedure. Default: None. Valid options: - None: 'nndsvd' if n_components <= min(n_samples, n_features), otherwise 'random'. - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - 'custom': use custom matrices W and H" - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Truncate all values less then this in output to zero." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Algorithms for NMF initialization.\n\nComputes an initial guess for the non-negative\nrank k matrix approximation for X: X = WH.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix to be decomposed.\n\nn_components : int\n The number of components desired in the approximation.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - None: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H\n\neps : float, default=1e-6\n Truncate all values less then this in output to zero.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nW : array-like of shape (n_samples, n_components)\n Initial guesses for solving X ~= WH.\n\nH : array-like of shape (n_components, n_features)\n Initial guesses for solving X ~= WH.\n\nReferences\n----------\nC. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for\nnonnegative matrix factorization - Pattern Recognition, 2008\nhttp://tinyurl.com/nndsvd" - }, - { - "name": "_update_coordinate_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function for _fit_coordinate_descent.\n\nUpdate W to minimize the objective function, iterating once over all\ncoordinates. By symmetry, to update H, one can call\n_update_coordinate_descent(X.T, Ht, W, ...)." - }, - { - "name": "_fit_coordinate_descent", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant matrix." - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations before timing out." - }, - { - "name": "l1_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for W." - }, - { - "name": "l1_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for H." - }, - { - "name": "l2_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for W." - }, - { - "name": "l2_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for H." - }, - { - "name": "update_H", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, randomize the order of coordinates in the CD solver." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to randomize the coordinates in the CD solver, when ``shuffle`` is set to ``True``. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent\n\nThe objective function is minimized with an alternating minimization of W\nand H. Each minimization is done with a cyclic (up to a permutation of the\nfeatures) Coordinate Descent.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant matrix.\n\nW : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nl1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\nl1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\nl2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\nl2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nverbose : int, default=0\n The verbosity level.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to randomize the coordinates in the CD solver, when\n ``shuffle`` is set to ``True``. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n The number of iterations done by the algorithm.\n\nReferences\n----------\nCichocki, Andrzej, and Phan, Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009." - }, - { - "name": "_multiplicative_update_w", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update W in Multiplicative Update NMF." - }, - { - "name": "_multiplicative_update_h", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update H in Multiplicative Update NMF." - }, - { - "name": "_fit_multiplicative_update", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant input matrix." - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess for the solution." - }, - { - "name": "beta_loss", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": true, - "default": "'frobenius'", - "limitation": null, - "ignored": false, - "docstring": "String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}. Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "l1_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for W." - }, - { - "name": "l1_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization parameter for H." - }, - { - "name": "l2_reg_W", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for W." - }, - { - "name": "l2_reg_H", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization parameter for H." - }, - { - "name": "update_H", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Non-negative Matrix Factorization with Multiplicative Update.\n\nThe objective function is _beta_divergence(X, WH) and is minimized with an\nalternating minimization of W and H. Each minimization is done with a\nMultiplicative Update.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant input matrix.\n\nW : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros.\n\nmax_iter : int, default=200\n Number of iterations.\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nl1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\nl1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\nl2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\nl2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nverbose : int, default=0\n The verbosity level.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n The number of iterations done by the algorithm.\n\nReferences\n----------\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9)." - }, - { - "name": "non_negative_factorization", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant matrix." - }, - { - "name": "W", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution." - }, - { - "name": "H", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If init='custom', it is used as initial guess for the solution. If update_H=False, it is used as a constant, to solve for W only." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components, if n_components is not set all features are kept." - }, - { - "name": "init", - "type": "Literal['random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Method used to initialize the procedure. Valid options: - None: 'nndsvd' if n_components < n_features, otherwise 'random'. - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - 'custom': use custom matrices W and H if `update_H=True`. If `update_H=False`, then only custom matrix H is used. .. versionchanged:: 0.23 The default value of `init` changed from 'random' to None in 0.23." - }, - { - "name": "update_H", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated." - }, - { - "name": "solver", - "type": "Literal['cd', 'mu']", - "hasDefault": true, - "default": "'cd'", - "limitation": null, - "ignored": false, - "docstring": "Numerical solver to use: - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical Alternating Least Squares (Fast HALS). - 'mu' is a Multiplicative Update solver. .. versionadded:: 0.17 Coordinate Descent solver. .. versionadded:: 0.19 Multiplicative Update solver." - }, - { - "name": "beta_loss", - "type": "Literal['frobenius', 'kullback-leibler', 'itakura-saito']", - "hasDefault": true, - "default": "'frobenius'", - "limitation": null, - "ignored": false, - "docstring": "Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros. Used only in 'mu' solver. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations before timing out." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization terms." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The regularization mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm). For l1_ratio = 1 it is an elementwise L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2." - }, - { - "name": "regularization", - "type": "Literal['both', 'components', 'transformation']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Select whether the regularization affects the components (H), the transformation (W), both or none of them." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for NMF initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, randomize the order of coordinates in the CD solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{Fro}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nFor multiplicative-update ('mu') solver, the Frobenius norm\n:math:`(0.5 * ||X - WH||_{Fro}^2)` can be changed into another\nbeta-divergence loss, by changing the beta_loss parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant matrix.\n\nW : array-like of shape (n_samples, n_components), default=None\n If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n If init='custom', it is used as initial guess for the solution.\n If update_H=False, it is used as a constant, to solve for W only.\n\nn_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n\n Valid options:\n\n - None: 'nndsvd' if n_components < n_features, otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H if `update_H=True`. If\n `update_H=False`, then only custom matrix H is used.\n\n .. versionchanged:: 0.23\n The default value of `init` changed from 'random' to None in 0.23.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nsolver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n\n - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n Alternating Least Squares (Fast HALS).\n\n - 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nalpha : float, default=0.\n Constant that multiplies the regularization terms.\n\nl1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nregularization : {'both', 'components', 'transformation'}, default=None\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n The verbosity level.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import non_negative_factorization\n>>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n... init='random', random_state=0)\n\nReferences\n----------\nCichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.\n\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9)." - } - ] - }, - { - "name": "sklearn.decomposition._pca", - "imports": [ - "from math import log", - "from math import sqrt", - "import numbers", - "import numpy as np", - "from scipy import linalg", - "from scipy.special import gammaln", - "from scipy.sparse import issparse", - "from scipy.sparse.linalg import svds", - "from _base import _BasePCA", - "from utils import check_random_state", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import fast_logdet", - "from utils.extmath import randomized_svd", - "from utils.extmath import svd_flip", - "from utils.extmath import stable_cumsum", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "PCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "Union[float, int, Literal['mle']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components to keep. if n_components is not set all components are kept:: n_components == min(n_samples, n_features) If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's MLE is used to guess the dimension. Use of ``n_components == 'mle'`` will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``. If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components. If ``svd_solver == 'arpack'``, the number of components must be strictly less than the minimum of n_features and n_samples. Hence, the None case results in:: n_components == min(n_samples, n_features) - 1" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, data passed to fit are overwritten and running fit(X).transform(X) will not yield the expected results, use fit_transform(X) instead." - }, - { - "name": "whiten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When True (False by default) the `components_` vectors are multiplied by the square root of n_samples and then divided by the singular values to ensure uncorrelated outputs with unit component-wise variances. Whitening will remove some information from the transformed signal (the relative variance scales of the components) but can sometime improve the predictive accuracy of the downstream estimators by making their data respect some hard-wired assumptions." - }, - { - "name": "svd_solver", - "type": "Literal['auto', 'full', 'arpack', 'randomized']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If auto : The solver is selected by a default policy based on `X.shape` and `n_components`: if the input data is larger than 500x500 and the number of components to extract is lower than 80% of the smallest dimension of the data, then the more efficient 'randomized' method is enabled. Otherwise the exact full SVD is computed and optionally truncated afterwards. If full : run exact full SVD calling the standard LAPACK solver via `scipy.linalg.svd` and select the components by postprocessing If arpack : run SVD truncated to n_components calling ARPACK solver via `scipy.sparse.linalg.svds`. It requires strictly 0 < n_components < min(X.shape) If randomized : run randomized SVD by the method of Halko et al. .. versionadded:: 0.18.0" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). .. versionadded:: 0.18.0" - }, - { - "name": "iterated_power", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations for the power method computed by svd_solver == 'randomized'. Must be of range [0, infinity). .. versionadded:: 0.18.0" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when the 'arpack' or 'randomized' solvers are used. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. .. versionadded:: 0.18.0" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model with X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model with X and apply the dimensionality reduction on X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed values.\n\nNotes\n-----\nThis method returns a Fortran-ordered array. To convert it to a\nC-ordered array, use 'np.ascontiguousarray'." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dispatch to the right submethod depending on the chosen solver." - }, - { - "name": "_fit_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model by computing full SVD on X." - }, - { - "name": "_fit_truncated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model by computing truncated SVD (by ARPACK or randomized)\non X." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the log-likelihood of each sample.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the average log-likelihood of all samples.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\ny : Ignored\n\nReturns\n-------\nll : float\n Average log-likelihood of the samples under the current model." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, float or 'mle', default=None\n Number of components to keep.\n if n_components is not set all components are kept::\n\n n_components == min(n_samples, n_features)\n\n If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n number of components such that the amount of variance that needs to be\n explained is greater than the percentage specified by n_components.\n\n If ``svd_solver == 'arpack'``, the number of components must be\n strictly less than the minimum of n_features and n_samples.\n\n Hence, the None case results in::\n\n n_components == min(n_samples, n_features) - 1\n\ncopy : bool, default=True\n If False, data passed to fit are overwritten and running\n fit(X).transform(X) will not yield the expected results,\n use fit_transform(X) instead.\n\nwhiten : bool, default=False\n When True (False by default) the `components_` vectors are multiplied\n by the square root of n_samples and then divided by the singular values\n to ensure uncorrelated outputs with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometime\n improve the predictive accuracy of the downstream estimators by\n making their data respect some hard-wired assumptions.\n\nsvd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n If auto :\n The solver is selected by a default policy based on `X.shape` and\n `n_components`: if the input data is larger than 500x500 and the\n number of components to extract is lower than 80% of the smallest\n dimension of the data, then the more efficient 'randomized'\n method is enabled. Otherwise the exact full SVD is computed and\n optionally truncated afterwards.\n If full :\n run exact full SVD calling the standard LAPACK solver via\n `scipy.linalg.svd` and select the components by postprocessing\n If arpack :\n run SVD truncated to n_components calling ARPACK solver via\n `scipy.sparse.linalg.svds`. It requires strictly\n 0 < n_components < min(X.shape)\n If randomized :\n run randomized SVD by the method of Halko et al.\n\n .. versionadded:: 0.18.0\n\ntol : float, default=0.0\n Tolerance for singular values computed by svd_solver == 'arpack'.\n Must be of range [0.0, infinity).\n\n .. versionadded:: 0.18.0\n\niterated_power : int or 'auto', default='auto'\n Number of iterations for the power method computed by\n svd_solver == 'randomized'.\n Must be of range [0, infinity).\n\n .. versionadded:: 0.18.0\n\nrandom_state : int, RandomState instance or None, default=None\n Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18.0\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Principal axes in feature space, representing the directions of\n maximum variance in the data. The components are sorted by\n ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n The amount of variance explained by each of the selected components.\n\n Equal to n_components largest eigenvalues\n of the covariance matrix of X.\n\n .. versionadded:: 0.18\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\n If ``n_components`` is not set then all components are stored and the\n sum of the ratios is equal to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\n .. versionadded:: 0.19\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\n Equal to `X.mean(axis=0)`.\n\nn_components_ : int\n The estimated number of components. When n_components is set\n to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n number is estimated from input data. Otherwise it equals the parameter\n n_components, or the lesser value of n_features and n_samples\n if n_components is None.\n\nn_features_ : int\n Number of features in the training data.\n\nn_samples_ : int\n Number of samples in the training data.\n\nnoise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n compute the estimated data covariance and score samples.\n\n Equal to the average of (min(n_features, n_samples) - n_components)\n smallest eigenvalues of the covariance matrix of X.\n\nSee Also\n--------\nKernelPCA : Kernel Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\nIncrementalPCA : Incremental Principal Component Analysis.\n\nReferences\n----------\nFor n_components == 'mle', this class uses the method of *Minka, T. P.\n\"Automatic choice of dimensionality for PCA\". In NIPS, pp. 598-604*\n\nImplements the probabilistic PCA model from:\nTipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\ncomponent analysis\". Journal of the Royal Statistical Society:\nSeries B (Statistical Methodology), 61(3), 611-622.\nvia the score and score_samples methods.\nSee http://www.miketipping.com/papers/met-mppca.pdf\n\nFor svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\nFor svd_solver == 'randomized', see:\n*Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n\"Finding structure with randomness: Probabilistic algorithms for\nconstructing approximate matrix decompositions\".\nSIAM review, 53(2), 217-288.* and also\n*Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n\"A randomized algorithm for the decomposition of matrices\".\nApplied and Computational Harmonic Analysis, 30(1), 47-68.*\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import PCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> pca = PCA(n_components=2)\n>>> pca.fit(X)\nPCA(n_components=2)\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.0075...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=2, svd_solver='full')\n>>> pca.fit(X)\nPCA(n_components=2, svd_solver='full')\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.00755...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=1, svd_solver='arpack')\n>>> pca.fit(X)\nPCA(n_components=1, svd_solver='arpack')\n>>> print(pca.explained_variance_ratio_)\n[0.99244...]\n>>> print(pca.singular_values_)\n[6.30061...]" - } - ], - "functions": [ - { - "name": "_assess_dimension", - "decorators": [], - "parameters": [ - { - "name": "spectrum", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data spectrum." - }, - { - "name": "rank", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tested rank value. It should be strictly lower than n_features, otherwise the method isn't specified (division by zero in equation (31) from the paper)." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log-likelihood of a rank ``rank`` dataset.\n\nThe dataset is assumed to be embedded in gaussian noise of shape(n,\ndimf) having spectrum ``spectrum``.\n\nParameters\n----------\nspectrum : ndarray of shape (n_features,)\n Data spectrum.\nrank : int\n Tested rank value. It should be strictly lower than n_features,\n otherwise the method isn't specified (division by zero in equation\n (31) from the paper).\nn_samples : int\n Number of samples.\n\nReturns\n-------\nll : float\n The log-likelihood.\n\nNotes\n-----\nThis implements the method of `Thomas P. Minka:\nAutomatic Choice of Dimensionality for PCA. NIPS 2000: 598-604`" - }, - { - "name": "_infer_dimension", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Infers the dimension of a dataset with a given spectrum.\n\nThe returned value will be in [1, n_features - 1]." - } - ] - }, - { - "name": "sklearn.decomposition._sparse_pca", - "imports": [ - "import numpy as np", - "from utils import check_random_state", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from linear_model import ridge_regression", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from _dict_learning import dict_learning", - "from _dict_learning import dict_learning_online" - ], - "classes": [ - { - "name": "SparsePCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of sparse atoms to extract." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter. Higher values lead to sparser components." - }, - { - "name": "ridge_alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of ridge shrinkage to apply in order to improve conditioning when calling the transform method." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the stopping condition." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "lars: uses the least angle regression method to solve the lasso problem (linear_model.lars_path) cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). Lars will be faster if the estimated components are sparse." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "U_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial values for the loadings for warm restart scenarios." - }, - { - "name": "V_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial values for the components for warm restart scenarios." - }, - { - "name": "verbose", - "type": "Union[bool, int]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages. Defaults to 0." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used during dictionary learning. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data to be transformed, must have the same number of features as the data used to train the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least Squares projection of the data onto the sparse components.\n\nTo avoid instability issues in case the system is under-determined,\nregularization can be applied (Ridge regression) via the\n`ridge_alpha` parameter.\n\nNote that Sparse PCA components orthogonality is not enforced as in PCA\nhence one cannot use a simple linear projection.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data." - } - ], - "docstring": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of sparse atoms to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\nridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nU_init : ndarray of shape (n_samples, n_components), default=None\n Initial values for the loadings for warm restart scenarios.\n\nV_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the components for warm restart scenarios.\n\nverbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nrandom_state : int, RandomState instance or None, default=None\n Used during dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\nerror_ : ndarray\n Vector of errors at each iteration.\n\nn_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import SparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = SparsePCA(n_components=5, random_state=0)\n>>> transformer.fit(X)\nSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.9666...\n\nSee Also\n--------\nPCA\nMiniBatchSparsePCA\nDictionaryLearning" - }, - { - "name": "MiniBatchSparsePCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of sparse atoms to extract" - }, - { - "name": "alpha", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Sparsity controlling parameter. Higher values lead to sparser components." - }, - { - "name": "ridge_alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of ridge shrinkage to apply in order to improve conditioning when calling the transform method." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "number of iterations to perform for each mini batch" - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "callable that gets invoked every five iterations" - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "the number of features to take in each mini batch" - }, - { - "name": "verbose", - "type": "Union[bool, int]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity; the higher, the more messages. Defaults to 0." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to shuffle the data before splitting it in batches" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "method", - "type": "Literal['lars', 'cd']", - "hasDefault": true, - "default": "'lars'", - "limitation": null, - "ignored": false, - "docstring": "lars: uses the least angle regression method to solve the lasso problem (linear_model.lars_path) cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). Lars will be faster if the estimated components are sparse." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for random shuffling when ``shuffle`` is set to ``True``, during online dictionary learning. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - } - ], - "docstring": "Mini-batch Sparse Principal Components Analysis\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n number of sparse atoms to extract\n\nalpha : int, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\nridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\nn_iter : int, default=100\n number of iterations to perform for each mini batch\n\ncallback : callable, default=None\n callable that gets invoked every five iterations\n\nbatch_size : int, default=3\n the number of features to take in each mini batch\n\nverbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nshuffle : bool, default=True\n whether to shuffle the data before splitting it in batches\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for random shuffling when ``shuffle`` is set to ``True``,\n during online dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\nn_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import MiniBatchSparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n... random_state=0)\n>>> transformer.fit(X)\nMiniBatchSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.94\n\nSee Also\n--------\nPCA\nSparsePCA\nDictionaryLearning" - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition._truncated_svd", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.sparse.linalg import svds", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import check_random_state", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import randomized_svd", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import svd_flip", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import check_is_fitted" - ], - "classes": [ - { - "name": "TruncatedSVD", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Desired dimensionality of output data. Must be strictly less than the number of features. The default value is useful for visualisation. For LSA, a value of 100 is recommended." - }, - { - "name": "algorithm", - "type": "Literal['arpack', 'randomized']", - "hasDefault": true, - "default": "'randomized'", - "limitation": null, - "ignored": false, - "docstring": "SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy (scipy.sparse.linalg.svds), or \"randomized\" for the randomized algorithm due to Halko (2009)." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations for randomized SVD solver. Not used by ARPACK. The default is larger than the default in :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse matrices that may have large slowly decaying spectrum." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used during randomized svd. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for ARPACK. 0 means machine precision. Ignored by randomized SVD solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit model on training data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the transformer object." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model to X and perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "New data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X back to its original space.\n\nReturns an array X_original whose transform would be X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n Note that this is always a dense array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Desired dimensionality of output data.\n Must be strictly less than the number of features.\n The default value is useful for visualisation. For LSA, a value of\n 100 is recommended.\n\nalgorithm : {'arpack', 'randomized'}, default='randomized'\n SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n algorithm due to Halko (2009).\n\nn_iter : int, default=5\n Number of iterations for randomized SVD solver. Not used by ARPACK. The\n default is larger than the default in\n :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n matrices that may have large slowly decaying spectrum.\n\nrandom_state : int, RandomState instance or None, default=None\n Used during randomized svd. Pass an int for reproducible results across\n multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=0.\n Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n SVD solver.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n\nexplained_variance_ : ndarray of shape (n_components,)\n The variance of the training samples transformed by a projection to\n each component.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\nsingular_values_ : ndarray od shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\nExamples\n--------\n>>> from sklearn.decomposition import TruncatedSVD\n>>> from scipy.sparse import random as sparse_random\n>>> X = sparse_random(100, 100, density=0.01, format='csr',\n... random_state=42)\n>>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> svd.fit(X)\nTruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> print(svd.explained_variance_ratio_)\n[0.0646... 0.0633... 0.0639... 0.0535... 0.0406...]\n>>> print(svd.explained_variance_ratio_.sum())\n0.286...\n>>> print(svd.singular_values_)\n[1.553... 1.512... 1.510... 1.370... 1.199...]\n\nSee Also\n--------\nPCA\n\nReferences\n----------\nFinding structure with randomness: Stochastic algorithms for constructing\napproximate matrix decompositions\nHalko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\nNotes\n-----\nSVD suffers from a problem called \"sign indeterminacy\", which means the\nsign of the ``components_`` and the output from transform depend on the\nalgorithm and random state. To work around this, fit instances of this\nclass to data once, then keep the instance around to do transformations." - } - ], - "functions": [] - }, - { - "name": "sklearn.decomposition", - "imports": [ - "from _nmf import NMF", - "from _nmf import non_negative_factorization", - "from _pca import PCA", - "from _incremental_pca import IncrementalPCA", - "from _kernel_pca import KernelPCA", - "from _sparse_pca import SparsePCA", - "from _sparse_pca import MiniBatchSparsePCA", - "from _truncated_svd import TruncatedSVD", - "from _fastica import FastICA", - "from _fastica import fastica", - "from _dict_learning import dict_learning", - "from _dict_learning import dict_learning_online", - "from _dict_learning import sparse_encode", - "from _dict_learning import DictionaryLearning", - "from _dict_learning import MiniBatchDictionaryLearning", - "from _dict_learning import SparseCoder", - "from _factor_analysis import FactorAnalysis", - "from utils.extmath import randomized_svd", - "from _lda import LatentDirichletAllocation" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.decomposition.tests.test_dict_learning", - "imports": [ - "import pytest", - "import numpy as np", - "from functools import partial", - "import itertools", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils import check_array", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.decomposition import DictionaryLearning", - "from sklearn.decomposition import MiniBatchDictionaryLearning", - "from sklearn.decomposition import SparseCoder", - "from sklearn.decomposition import dict_learning", - "from sklearn.decomposition import dict_learning_online", - "from sklearn.decomposition import sparse_encode", - "from sklearn.utils.estimator_checks import check_transformer_data_not_an_array", - "from sklearn.utils.estimator_checks import check_transformer_general", - "from sklearn.utils.estimator_checks import check_transformers_unfitted", - "from io import StringIO", - "import sys" - ], - "classes": [], - "functions": [ - { - "name": "test_sparse_encode_shapes_omp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_overcomplete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lars_positive_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lars_dict_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lars_code_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_reconstruction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_reconstruction_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_lassocd_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_nonzero_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_unknown_fit_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_lars_positive_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_dictionary_learning_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minibatch_dictionary_learning_lars", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_estimator_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_overcomplete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_initialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_readonly_initialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_online_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dict_learning_iter_offset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_unavailable_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_encode_error_default_sparsity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unknown_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_estimator_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_parallel_mmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_common_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_coder_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_factor_analysis", - "imports": [ - "from itertools import combinations", - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.decomposition import FactorAnalysis", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.decomposition._factor_analysis import _ortho_rotation" - ], - "classes": [], - "functions": [ - { - "name": "test_factor_analysis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_fastica", - "imports": [ - "import itertools", - "import warnings", - "import pytest", - "import numpy as np", - "from scipy import stats", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.decomposition import FastICA", - "from sklearn.decomposition import fastica", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition._fastica import _gs_decorrelation", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "center_and_norm", - "decorators": [], - "parameters": [ - { - "name": "x: ndarray", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array with an axis of observations (statistical units) measured on random variables." - }, - { - "name": "axis: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the mean and variance are calculated." - }, - { - "name": "optional", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the mean and variance are calculated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Centers and norms x **in place**\n\nParameters\n-----------\nx: ndarray\n Array with an axis of observations (statistical units) measured on\n random variables.\naxis: int, optional\n Axis along which the mean and variance are calculated." - }, - { - "name": "test_gs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_simple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_nowhiten", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_square_fastica", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fastica_output_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_incremental_pca", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn import datasets", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import IncrementalPCA", - "from scipy import sparse" - ], - "classes": [], - "functions": [ - { - "name": "test_incremental_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_check_projection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_num_features_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_batch_signs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_batch_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_batch_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_against_pca_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_against_pca_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explained_variances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_whitening", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_partial_fit_float_division", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_pca_fit_overflow_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_kernel_pca", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import KernelPCA", - "from sklearn.datasets import make_circles", - "from sklearn.datasets import make_blobs", - "from sklearn.linear_model import Perceptron", - "from sklearn.pipeline import Pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.utils.validation import _check_psd_eigenvalues" - ], - "classes": [], - "functions": [ - { - "name": "test_kernel_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_invalid_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_consistent_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_deterministic_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_linear_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_remove_zero_eig", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_zero_eig", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This test checks that fit().transform() returns the same result as\nfit_transform() in case of non-removed zero eigenvalue.\nNon-regression test for issue #12141 (PR #12143)" - }, - { - "name": "test_kernel_pca_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_pca_invalid_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_pipeline_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nested_circles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_conditioning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that ``_check_psd_eigenvalues`` is correctly called\nNon-regression test for issue #12140 (PR #12145)" - }, - { - "name": "test_kernel_pca_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_32_64_decomposition_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the decomposition is similar for 32 and 64 bits data " - }, - { - "name": "test_kernel_pcc_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_nmf", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from sklearn.decomposition import NMF", - "from sklearn.decomposition import non_negative_factorization", - "from sklearn.decomposition import _nmf as nmf", - "from scipy.sparse import csc_matrix", - "import pytest", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.extmath import squared_norm", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialize_nn_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameter_checking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialize_close", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialize_variants", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_fit_nn_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_fit_close", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_transform_custom_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_greater_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_sparse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_negative_factorization_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_negative_factorization_checking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_beta_divergence_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the beta-divergence of X and W.H for dense array only.\n\nUsed as a reference for testing nmf._beta_divergence." - }, - { - "name": "test_beta_divergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_special_sparse_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_multiplicative_update_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_negative_beta_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_decreasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_underflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_float32_float64_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nmf_custom_init_dtype_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_default_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_online_lda", - "imports": [ - "import sys", - "import numpy as np", - "from scipy.linalg import block_diag", - "from scipy.sparse import csr_matrix", - "from scipy.special import psi", - "import pytest", - "from sklearn.decomposition import LatentDirichletAllocation", - "from sklearn.decomposition._lda import _dirichlet_expectation_1d", - "from sklearn.decomposition._lda import _dirichlet_expectation_2d", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import if_safe_multiprocessing_with_blas", - "from sklearn.exceptions import NotFittedError", - "from io import StringIO" - ], - "classes": [], - "functions": [ - { - "name": "_build_sparse_mtx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_default_prior_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_batch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_online", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_dense_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_negative_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_no_component_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_multi_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_partial_fit_multi_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_preplexity_mismatch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_perplexity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perplexity_input_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_score_perplexity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_fit_perplexity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_empty_docs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test LDA on empty document (all-zero rows)." - }, - { - "name": "test_dirichlet_expectation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test Cython version of Dirichlet expectation calculation." - }, - { - "name": "check_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_pca", - "imports": [ - "import numpy as np", - "import scipy as sp", - "import pytest", - "from sklearn.utils._testing import assert_allclose", - "from sklearn import datasets", - "from sklearn.decomposition import PCA", - "from sklearn.datasets import load_iris", - "from sklearn.decomposition._pca import _assess_dimension", - "from sklearn.decomposition._pca import _infer_dimension" - ], - "classes": [], - "functions": [ - { - "name": "test_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_empty_slice_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_whitening", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_explained_variance_equivalence_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_explained_variance_empirical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_singular_values_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_singular_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_check_projection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_check_projection_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_mle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_mle_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_dim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_3", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infer_dim_by_explained_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_score3", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_sanity_noise_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_score_consistency_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_zero_noise_variance_edge_cases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_svd_solver_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_bad_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_deterministic_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_dtype_preservation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pca_float_dtype_preservation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pca_int_dtype_upcast_to_double", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_n_components_mostly_explained_variance_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assess_dimension_bad_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_eigenvalues_mle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mle_redundant_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_mle_too_few_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mle_simple_case", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assess_dimesion_rank_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_sparse_pca", - "imports": [ - "import sys", - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import if_safe_multiprocessing_with_blas", - "from sklearn.decomposition import SparsePCA", - "from sklearn.decomposition import MiniBatchSparsePCA", - "from sklearn.decomposition import PCA", - "from sklearn.utils import check_random_state", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "generate_toy_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform_tall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_initialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mini_batch_correct_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mini_batch_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaling_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_vs_spca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spca_n_components_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests.test_truncated_svd", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.decomposition import PCA", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "X_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_too_many_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_formats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_integers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explained_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explained_variance_components_10_20", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_values_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_values_expected", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_truncated_svd_eq_pca", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.decomposition.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble.setup", - "imports": [ - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._bagging", - "imports": [ - "import itertools", - "import numbers", - "import numpy as np", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from warnings import warn", - "from joblib import Parallel", - "from _base import BaseEnsemble", - "from _base import _partition_estimators", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from metrics import r2_score", - "from metrics import accuracy_score", - "from tree import DecisionTreeClassifier", - "from tree import DecisionTreeRegressor", - "from utils import check_random_state", - "from utils import check_array", - "from utils import column_or_1d", - "from utils import indices_to_mask", - "from utils.metaestimators import if_delegate_has_method", - "from utils.multiclass import check_classification_targets", - "from utils.random import sample_without_replacement", - "from utils.validation import has_fit_parameter", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "BaseBagging", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a Bagging ensemble of estimators from the training\n set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\nReturns\n-------\nself : object" - }, - { - "name": "_parallel_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to use instead of self.max_samples." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override value used when constructing base estimator. Only supported if the base estimator has a max_depth parameter." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if the base estimator supports sample weighting." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a Bagging ensemble of estimators from the training\n set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nmax_samples : int or float, default=None\n Argument to use instead of self.max_samples.\n\nmax_depth : int, default=None\n Override value used when constructing base estimator. Only\n supported if the base estimator has a max_depth parameter.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\nReturns\n-------\nself : object" - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate out of bag predictions and score." - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimators_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "estimators_samples_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The subset of drawn samples for each base estimator.\n\nReturns a dynamically generated list of indices identifying\nthe samples used for fitting each member of the ensemble, i.e.,\nthe in-bag samples.\n\nNote: the list is re-created at each call to the property in order\nto reduce the object memory footprint by not storing the sampling\ndata. Thus fetching the property may be slower than expected." - } - ], - "docstring": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "BaggingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:`~sklearn.tree.DecisionTreeClassifier`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of base estimators in the ensemble." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to draw from X to train each base estimator (with replacement by default, see `bootstrap` for more details). - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples." - }, - { - "name": "max_features", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of features to draw from X to train each base estimator ( without replacement by default, see `bootstrap_features` for more details). - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether samples are drawn with replacement. If False, sampling without replacement is performed." - }, - { - "name": "bootstrap_features", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether features are drawn with replacement." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization error." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. See :term:`the Glossary `. .. versionadded:: 0.17 *warm_start* constructor parameter." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random resampling of the original dataset (sample wise and feature wise). If the base estimator accepts a `random_state` attribute, a different seed is generated for each instance in the ensemble. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class for X.\n\nThe predicted class of an input sample is computed as the class with\nthe highest mean predicted probability. If base estimators do not\nimplement a ``predict_proba`` method, then it resorts to voting.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted classes." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe mean predicted class probabilities of the base estimators in the\nensemble. If base estimators do not implement a ``predict_proba``\nmethod, then it resorts to voting and the predicted class probabilities\nof an input sample represents the proportion of estimators predicting\neach class.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the base\nestimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average of the decision functions of the base classifiers.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, k)\n The decision function of the input samples. The columns correspond\n to the classes in sorted order, as they appear in the attribute\n ``classes_``. Regression and binary classification are special\n cases with ``k == 1``, otherwise ``k==n_classes``." - } - ], - "docstring": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeClassifier`.\n\nn_estimators : int, default=10\n The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\nbootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* constructor parameter.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nn_features_ : int\n The number of features when :meth:`fit` is performed.\n\nestimators_ : list of estimators\n The collection of fitted base estimators.\n\nestimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_classes_ : int or list\n The number of classes.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = BaggingClassifier(base_estimator=SVC(),\n... n_estimators=10, random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012." - }, - { - "name": "BaggingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a :class:`~sklearn.tree.DecisionTreeRegressor`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of base estimators in the ensemble." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to draw from X to train each base estimator (with replacement by default, see `bootstrap` for more details). - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples." - }, - { - "name": "max_features", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of features to draw from X to train each base estimator ( without replacement by default, see `bootstrap_features` for more details). - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether samples are drawn with replacement. If False, sampling without replacement is performed." - }, - { - "name": "bootstrap_features", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether features are drawn with replacement." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization error." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. See :term:`the Glossary `." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random resampling of the original dataset (sample wise and feature wise). If the base estimator accepts a `random_state` attribute, a different seed is generated for each instance in the ensemble. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrices are accepted only if they are supported by the base estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeRegressor`.\n\nn_estimators : int, default=10\n The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\nbootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nn_features_ : int\n The number of features when :meth:`fit` is performed.\n\nestimators_ : list of estimators\n The collection of fitted sub-estimators.\n\nestimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_prediction_` might contain NaN. This attribute exists only\n when ``oob_score`` is True.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=100, n_features=4,\n... n_informative=2, n_targets=1,\n... random_state=0, shuffle=False)\n>>> regr = BaggingRegressor(base_estimator=SVR(),\n... n_estimators=10, random_state=0).fit(X, y)\n>>> regr.predict([[0, 0, 0, 0]])\narray([-2.8720...])\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012." - } - ], - "functions": [ - { - "name": "_generate_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Draw randomly sampled indices." - }, - { - "name": "_generate_bagging_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Randomly draw feature and sample indices." - }, - { - "name": "_parallel_build_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to build a batch of estimators within a job." - }, - { - "name": "_parallel_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute (proba-)predictions within a job." - }, - { - "name": "_parallel_predict_log_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute log probabilities within a job." - }, - { - "name": "_parallel_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute decisions within a job." - }, - { - "name": "_parallel_predict_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to compute predictions within a job." - } - ] - }, - { - "name": "sklearn.ensemble._base", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numbers", - "from typing import List", - "import numpy as np", - "from joblib import effective_n_jobs", - "from base import clone", - "from base import is_classifier", - "from base import is_regressor", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from utils import Bunch", - "from utils import _print_elapsed_time", - "from utils import check_random_state", - "from utils.metaestimators import _BaseComposition" - ], - "classes": [ - { - "name": "BaseEnsemble", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the ensemble is built." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The number of estimators in the ensemble." - }, - { - "name": "estimator_params", - "type": "List[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The list of attributes to use as parameters when instantiating a new base estimator. If none are given, default parameters are used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and the n_estimator attribute.\n\nSets the base_estimator_` attributes." - }, - { - "name": "_make_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make and configure a copy of the `base_estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators." - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the number of estimators in the ensemble." - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the index'th estimator in the ensemble." - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return iterator over estimators in the ensemble." - } - ], - "docstring": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.\n\nParameters\n----------\nbase_estimator : object\n The base estimator from which the ensemble is built.\n\nn_estimators : int, default=10\n The number of estimators in the ensemble.\n\nestimator_params : list of str, default=tuple()\n The list of attributes to use as parameters when instantiating a\n new base estimator. If none are given, default parameters are used.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of estimators\n The collection of fitted base estimators." - }, - { - "name": "_BaseHeterogeneousEnsemble", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ensemble of estimators to use in the ensemble. Each element of the list is defined as a tuple of string (i.e. name of the estimator) and an estimator instance. An estimator can be set to `'drop'` using `set_params`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "named_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [ - { - "name": "**params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specific parameters using e.g. `set_params(parameter_name=new_value)`. In addition, to setting the parameters of the estimator, the individual estimator of the estimators can also be set, or can be removed by setting them to 'drop'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the parameters of an estimator from the ensemble.\n\nValid parameter keys can be listed with `get_params()`. Note that you\ncan directly set the parameters of the estimators contained in\n`estimators`.\n\nParameters\n----------\n**params : keyword arguments\n Specific parameters using e.g.\n `set_params(parameter_name=new_value)`. In addition, to setting the\n parameters of the estimator, the individual estimator of the\n estimators can also be set, or can be removed by setting them to\n 'drop'." - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Setting it to True gets the various estimators and the parameters of the estimators as well." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the parameters of an estimator from the ensemble.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `estimators` parameter.\n\nParameters\n----------\ndeep : bool, default=True\n Setting it to True gets the various estimators and the parameters\n of the estimators as well." - } - ], - "docstring": "Base class for heterogeneous ensemble of learners.\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n The ensemble of estimators to use in the ensemble. Each element of the\n list is defined as a tuple of string (i.e. name of the estimator) and\n an estimator instance. An estimator can be set to `'drop'` using\n `set_params`.\n\nAttributes\n----------\nestimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it will not\n appear in `estimators_`." - } - ], - "functions": [ - { - "name": "_fit_single_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to fit an estimator within a job." - }, - { - "name": "_set_random_states", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator with potential randomness managed by random_state parameters." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the generation of the random integers. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set fixed random_state parameters for an estimator.\n\nFinds all parameters ending ``random_state`` and sets them to integers\nderived from ``random_state``.\n\nParameters\n----------\nestimator : estimator supporting get/set_params\n Estimator with potential randomness managed by random_state\n parameters.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n integers. Pass an int for reproducible output across multiple function\n calls.\n See :term:`Glossary `.\n\nNotes\n-----\nThis does not necessarily set *all* ``random_state`` attributes that\ncontrol an estimator's randomness, only those accessible through\n``estimator.get_params()``. ``random_state``s not controlled include\nthose belonging to:\n\n * cross-validation splitters\n * ``scipy.stats`` rvs" - }, - { - "name": "_partition_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to partition estimators between jobs." - } - ] - }, - { - "name": "sklearn.ensemble._forest", - "imports": [ - "import numbers", - "from warnings import catch_warnings", - "from warnings import simplefilter", - "from warnings import warn", - "import threading", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.sparse import issparse", - "from scipy.sparse import hstack as sparse_hstack", - "from joblib import Parallel", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from metrics import r2_score", - "from preprocessing import OneHotEncoder", - "from tree import DecisionTreeClassifier", - "from tree import DecisionTreeRegressor", - "from tree import ExtraTreeClassifier", - "from tree import ExtraTreeRegressor", - "from tree._tree import DTYPE", - "from tree._tree import DOUBLE", - "from utils import check_random_state", - "from utils import check_array", - "from utils import compute_sample_weight", - "from exceptions import DataConversionWarning", - "from _base import BaseEnsemble", - "from _base import _partition_estimators", - "from utils.fixes import delayed", - "from utils.fixes import _joblib_parallel_args", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BaseForest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply trees in the forest to X, return leaf indices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : ndarray of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the forest,\n return the index of the leaf x ends up in." - }, - { - "name": "decision_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the decision path in the forest.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator matrix where non zero elements indicates\n that the samples goes through the nodes. The matrix is of CSR\n format.\n\nn_nodes_ptr : ndarray of shape (n_estimators + 1,)\n The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n gives the indicator value for the i-th estimator." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a forest of trees from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, its dtype will be converted\n to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object" - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate out of bag predictions and score." - }, - { - "name": "_validate_y_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_X_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate X whenever one tries to predict, apply, predict_proba." - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros." - } - ], - "docstring": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "ForestClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute out-of-bag score." - }, - { - "name": "_validate_y_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class for X.\n\nThe predicted class of an input sample is a vote by the trees in\nthe forest, weighted by their probability estimates. That is,\nthe predicted class is the one with highest mean probability\nestimate across the trees.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample are computed as\nthe mean predicted class probabilities of the trees in the forest.\nThe class probability of a single tree is the fraction of samples of\nthe same class in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the trees in the\nforest.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - } - ], - "docstring": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "ForestRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the trees in the forest.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted values." - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute out-of-bag scores." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point." - } - ], - "docstring": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "RandomForestClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain. Note: this parameter is tree-specific." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\"). - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization accuracy." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``). See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "class_weight", - "type": "Literal[\"balanced\", \"balanced_subsample\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The \"balanced_subsample\" mode is the same as \"balanced\" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n Note: this parameter is tree-specific.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization accuracy.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\nn_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nSee Also\n--------\nDecisionTreeClassifier, ExtraTreesClassifier\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n>>> clf.fit(X, y)\nRandomForestClassifier(...)\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]" - }, - { - "name": "RandomForestRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"mse\", \"mae\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion, and \"mae\" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether to use out-of-bag samples to estimate the R^2 on unseen data." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``). See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n whether to use out-of-bag samples to estimate\n the R^2 on unseen data.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nDecisionTreeRegressor, ExtraTreesRegressor\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nThe default value ``max_features=\"auto\"`` uses ``n_features``\nrather than ``n_features / 3``. The latter was originally suggested in\n[1], whereas the former was more recently justified empirically in [2].\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n.. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n... random_state=0, shuffle=False)\n>>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n>>> regr.fit(X, y)\nRandomForestRegressor(...)\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-8.32987858]" - }, - { - "name": "ExtraTreesClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the generalization accuracy." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls 3 sources of randomness: - the bootstrapping of the samples used when building trees (if ``bootstrap=True``) - the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``) - the draw of the splits for each of the `max_features` See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "class_weight", - "type": "Literal[\"balanced\", \"balanced_subsample\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The \"balanced_subsample\" mode is the same as \"balanced\" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization accuracy.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreesClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\nn_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.ExtraTreeClassifier : Base classifier for this ensemble.\nRandomForestClassifier : Ensemble Classifier based on trees with optimal\n splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import ExtraTreesClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nExtraTreesClassifier(random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])" - }, - { - "name": "ExtraTreesRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "criterion", - "type": "Literal[\"mse\", \"mae\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion, and \"mae\" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Literal[\"auto\", \"sqrt\", \"log2\"]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `round(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree." - }, - { - "name": "oob_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use out-of-bag samples to estimate the R^2 on unseen data." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls 3 sources of randomness: - the bootstrapping of the samples used when building trees (if ``bootstrap=True``) - the sampling of the features to consider when looking for the best split at each node (if ``max_features < n_features``) - the draw of the splits for each of the `max_features` See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If bootstrap is True, the number of samples to draw from X to train each base estimator. - If None (default), then draw `X.shape[0]` samples. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0, 1)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the R^2 on unseen data.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features.\n\nn_outputs_ : int\n The number of outputs.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.ExtraTreeRegressor : Base estimator for this ensemble.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import ExtraTreesRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n... X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.2708..." - }, - { - "name": "RandomTreesEmbedding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of trees in the forest. .. versionchanged:: 0.22 The default value of ``n_estimators`` changed from 10 to 100 in 0.22." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return a sparse CSR matrix, as default behavior, or to return a dense array compatible with dense pipeline operators." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the generation of the random `y` used to fit the trees and the draw of the splits for each feature at the trees' nodes. See :term:`Glossary ` for details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity when fitting and predicting." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Use ``dtype=np.float32`` for maximum efficiency. Sparse matrices are also supported, use sparse ``csc_matrix`` for maximum efficiency." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data used to build forests. Use ``dtype=np.float32`` for maximum efficiency." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator and transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data used to build forests. Use ``dtype=np.float32`` for\n maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data to be transformed. Use ``dtype=np.float32`` for maximum efficiency. Sparse matrices are also supported, use sparse ``csr_matrix`` for maximum efficiency." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data to be transformed. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csr_matrix`` for maximum efficiency.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset." - } - ], - "docstring": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n Number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\nmax_depth : int, default=5\n The maximum depth of each tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` is the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` is the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nsparse_output : bool, default=True\n Whether or not to return a sparse CSR matrix, as default behavior,\n or to return a dense array compatible with dense pipeline operators.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the generation of the random `y` used to fit the trees\n and the draw of the splits for each feature at the trees' nodes.\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier instance\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier instances\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances (the higher, the more important the feature).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\none_hot_encoder_ : OneHotEncoder instance\n One-hot encoder used to create the sparse embedding.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n.. [2] Moosmann, F. and Triggs, B. and Jurie, F. \"Fast discriminative\n visual codebooks using randomized clustering forests\"\n NIPS 2007\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomTreesEmbedding\n>>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n>>> random_trees = RandomTreesEmbedding(\n... n_estimators=5, random_state=0, max_depth=1).fit(X)\n>>> X_sparse_embedding = random_trees.transform(X)\n>>> X_sparse_embedding.toarray()\narray([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])" - } - ], - "functions": [ - { - "name": "_get_n_samples_bootstrap", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples in the dataset." - }, - { - "name": "max_samples", - "type": "Union[float, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of samples to draw from the total available: - if float, this indicates a fraction of the total and should be the interval `(0, 1)`; - if int, this indicates the exact number of samples; - if None, this indicates the total number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the number of samples in a bootstrap sample.\n\nParameters\n----------\nn_samples : int\n Number of samples in the dataset.\nmax_samples : int or float\n The maximum number of samples to draw from the total available:\n - if float, this indicates a fraction of the total and should be\n the interval `(0, 1)`;\n - if int, this indicates the exact number of samples;\n - if None, this indicates the total number of samples.\n\nReturns\n-------\nn_samples_bootstrap : int\n The total number of samples to draw for the bootstrap sample." - }, - { - "name": "_generate_sample_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to _parallel_build_trees function." - }, - { - "name": "_generate_unsampled_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to forest._set_oob_score function." - }, - { - "name": "_parallel_build_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function used to fit a single tree in parallel." - }, - { - "name": "_accumulate_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is a utility function for joblib's Parallel.\n\nIt can't go locally in ForestClassifier or ForestRegressor, because joblib\ncomplains that it cannot pickle it when placed there." - } - ] - }, - { - "name": "sklearn.ensemble._gb", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "from _base import BaseEnsemble", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import BaseEstimator", - "from base import is_classifier", - "from utils import deprecated", - "from _gradient_boosting import predict_stages", - "from _gradient_boosting import predict_stage", - "from _gradient_boosting import _random_sample_mask", - "import numbers", - "import numpy as np", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from time import time", - "from model_selection import train_test_split", - "from tree import DecisionTreeRegressor", - "from tree._tree import DTYPE", - "from tree._tree import DOUBLE", - "from None import _gb_losses", - "from utils import check_random_state", - "from utils import check_array", - "from utils import column_or_1d", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.multiclass import check_classification_targets", - "from exceptions import NotFittedError", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "VerboseReporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level. If ``verbose==1`` output is printed once in a while (when iteration mod verbose_mod is zero).; if larger than 1 then output is printed for each update." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init", - "decorators": [], - "parameters": [ - { - "name": "est", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator" - }, - { - "name": "begin_at_stage", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "stage at which to begin reporting" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize reporter\n\nParameters\n----------\nest : Estimator\n The estimator\n\nbegin_at_stage : int, default=0\n stage at which to begin reporting" - }, - { - "name": "update", - "decorators": [], - "parameters": [ - { - "name": "j", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The new iteration." - }, - { - "name": "est", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update reporter with new iteration.\n\nParameters\n----------\nj : int\n The new iteration.\nest : Estimator\n The estimator." - } - ], - "docstring": "Reports verbose output to stdout.\n\nParameters\n----------\nverbose : int\n Verbosity level. If ``verbose==1`` output is printed once in a while\n (when iteration mod verbose_mod is zero).; if larger than 1 then output\n is printed for each update." - }, - { - "name": "BaseGradientBoosting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called by fit to validate y." - }, - { - "name": "_fit_stage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit another stage of ``_n_classes`` trees to the boosting model." - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check validity of parameters and raise ValueError if not valid." - }, - { - "name": "_init_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize model state and allocate model state data structures. " - }, - { - "name": "_clear_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Clear the state of the gradient boosting model. " - }, - { - "name": "_resize_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Add additional ``n_estimators`` entries to all attributes." - }, - { - "name": "_is_initialized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_initialized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the estimator is initialized, raising an error if not." - }, - { - "name": "_warn_mae_for_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (strings or integers in classification, real numbers in regression) For classification, labels must correspond to classes." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node." - }, - { - "name": "monitor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The monitor is called after each iteration with the current iteration, a reference to the estimator and the local variables of ``_fit_stages`` as keyword arguments ``callable(i, self, locals())``. If the callable returns ``True`` the fitting procedure is stopped. The monitor can be used for various things such as computing held-out estimates, early stopping, model introspect, and snapshoting." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ny : array-like of shape (n_samples,)\n Target values (strings or integers in classification, real numbers\n in regression)\n For classification, labels must correspond to classes.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nmonitor : callable, default=None\n The monitor is called after each iteration with the current\n iteration, a reference to the estimator and the local variables of\n ``_fit_stages`` as keyword arguments ``callable(i, self,\n locals())``. If the callable returns ``True`` the fitting procedure\n is stopped. The monitor can be used for various things such as\n computing held-out estimates, early stopping, model introspect, and\n snapshoting.\n\nReturns\n-------\nself : object" - }, - { - "name": "_fit_stages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Iteratively fits the stages.\n\nFor each stage it computes the progress (OOB, train score)\nand delegates to ``_fit_stage``.\nReturns the number of stages fit; might differ from ``n_estimators``\ndue to early stopping." - }, - { - "name": "_make_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_predict_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check input and compute raw predictions of the init estimator." - }, - { - "name": "_raw_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the sum of the trees raw predictions (+ init estimator)." - }, - { - "name": "_staged_raw_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nraw_predictions : generator of ndarray of shape (n_samples, k)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``." - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features,)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point." - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n In the case of binary classification n_classes is 1." - } - ], - "docstring": "Abstract base class for Gradient Boosting." - }, - { - "name": "GradientBoostingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['deviance', 'exponential']", - "hasDefault": true, - "default": "'deviance'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be optimized. 'deviance' refers to deviance (= logistic regression) for classification with probabilistic outputs. For loss 'exponential' gradient boosting recovers the AdaBoost algorithm." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance." - }, - { - "name": "subsample", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias." - }, - { - "name": "criterion", - "type": "Literal['friedman_mse', 'mse', 'mae']", - "hasDefault": true, - "default": "'friedman_mse'", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are 'friedman_mse' for the mean squared error with improvement score by Friedman, 'mse' for mean squared error, and 'mae' for the mean absolute error. The default value of 'friedman_mse' is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18 .. deprecated:: 0.24 `criterion='mae'` is deprecated and will be removed in version 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'` instead, as trees should use a least-square criterion in Gradient Boosting." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "init", - "type": "Literal['zero']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object that is used to compute the initial predictions. ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If 'zero', the initial raw predictions are set to zero. By default, a ``DummyEstimator`` predicting the classes priors is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to each Tree estimator at each boosting iteration. In addition, it controls the random permutation of the features at each split (see Notes for more details). It also controls the random spliting of the training data to obtain a validation set if `n_iter_no_change` is not None. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_features", - "type": "Literal['auto', 'sqrt', 'log2']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If 'auto', then `max_features=sqrt(n_features)`. - If 'sqrt', then `max_features=sqrt(n_features)`. - If 'log2', then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations. The split is stratified. .. versionadded:: 0.20" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. .. versionadded:: 0.20" - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_warn_mae_for_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, n_classes) or (n_samples,)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n order of the classes corresponds to that in the attribute\n :term:`classes_`. Regression and binary classification produce an\n array of shape (n_samples,)." - }, - { - "name": "staged_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict class at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nRaises\n------\nAttributeError\n If the ``loss`` does not support probabilities.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nRaises\n------\nAttributeError\n If the ``loss`` does not support probabilities.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "staged_predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict class probabilities at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples." - } - ], - "docstring": "Gradient Boosting for classification.\n\nGB builds an additive model in a\nforward stage-wise fashion; it allows for the optimization of\narbitrary differentiable loss functions. In each stage ``n_classes_``\nregression trees are fit on the negative gradient of the\nbinomial or multinomial deviance loss function. Binary classification\nis a special case where only a single regression tree is induced.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : {'deviance', 'exponential'}, default='deviance'\n The loss function to be optimized. 'deviance' refers to\n deviance (= logistic regression) for classification\n with probabilistic outputs. For loss 'exponential' gradient\n boosting recovers the AdaBoost algorithm.\n\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\nn_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\nsubsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\ncriterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are 'friedman_mse' for the mean squared error with improvement\n score by Friedman, 'mse' for mean squared error, and 'mae' for\n the mean absolute error. The default value of 'friedman_mse' is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'`\n instead, as trees should use a least-square criterion in\n Gradient Boosting.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_depth : int, default=3\n The maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\ninit : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n 'zero', the initial raw predictions are set to zero. By default, a\n ``DummyEstimator`` predicting the classes priors is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If 'auto', then `max_features=sqrt(n_features)`.\n - If 'sqrt', then `max_features=sqrt(n_features)`.\n - If 'log2', then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations. The split is stratified.\n\n .. versionadded:: 0.20\n\ntol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nn_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\n .. versionadded:: 0.20\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n The concrete ``LossFunction`` object.\n\ninit_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, ``loss_.K``)\n The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n classification, otherwise n_classes.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_features_ : int\n The number of data features.\n\nn_classes_ : int\n The number of classes.\n\nmax_features_ : int\n The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingClassifier : Histogram-based Gradient Boosting\n Classification Tree.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n tree classifiers on various sub-samples of the dataset and uses\n averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n on the original dataset and then fits additional copies of the\n classifier on the same dataset where the weights of incorrectly\n classified instances are adjusted such that subsequent classifiers\n focus more on difficult cases.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\nThe following example shows how to fit a gradient boosting classifier with\n100 decision stumps as weak learners.\n\n>>> from sklearn.datasets import make_hastie_10_2\n>>> from sklearn.ensemble import GradientBoostingClassifier\n\n>>> X, y = make_hastie_10_2(random_state=0)\n>>> X_train, X_test = X[:2000], X[2000:]\n>>> y_train, y_test = y[:2000], y[2000:]\n\n>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n... max_depth=1, random_state=0).fit(X_train, y_train)\n>>> clf.score(X_test, y_test)\n0.913..." - }, - { - "name": "GradientBoostingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['ls', 'lad', 'huber', 'quantile']", - "hasDefault": true, - "default": "'ls'", - "limitation": null, - "ignored": false, - "docstring": "Loss function to be optimized. 'ls' refers to least squares regression. 'lad' (least absolute deviation) is a highly robust loss function solely based on order information of the input variables. 'huber' is a combination of the two. 'quantile' allows quantile regression (use `alpha` to specify the quantile)." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance." - }, - { - "name": "subsample", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias." - }, - { - "name": "criterion", - "type": "Literal['friedman_mse', 'mse', 'mae']", - "hasDefault": true, - "default": "'friedman_mse'", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"friedman_mse\" for the mean squared error with improvement score by Friedman, \"mse\" for mean squared error, and \"mae\" for the mean absolute error. The default value of \"friedman_mse\" is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18 .. deprecated:: 0.24 `criterion='mae'` is deprecated and will be removed in version 1.1 (renaming of 0.26). The correct way of minimizing the absolute error is to use `loss='lad'` instead." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "init", - "type": "Literal['zero']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object that is used to compute the initial predictions. ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the initial raw predictions are set to zero. By default a ``DummyEstimator`` is used, predicting either the average target value (for loss='ls'), or a quantile for the other losses." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to each Tree estimator at each boosting iteration. In addition, it controls the random permutation of the features at each split (see Notes for more details). It also controls the random spliting of the training data to obtain a validation set if `n_iter_no_change` is not None. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_features", - "type": "Literal['auto', 'sqrt', 'log2']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The alpha-quantile of the huber loss function and the quantile loss function. Only if ``loss='huber'`` or ``loss='quantile'``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations. .. versionadded:: 0.20" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. .. versionadded:: 0.20" - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_warn_mae_for_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Predict regression target at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples." - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator." - }, - { - "name": "n_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Gradient Boosting for regression.\n\nGB builds an additive model in a forward stage-wise fashion;\nit allows for the optimization of arbitrary differentiable loss functions.\nIn each stage a regression tree is fit on the negative gradient of the\ngiven loss function.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : {'ls', 'lad', 'huber', 'quantile'}, default='ls'\n Loss function to be optimized. 'ls' refers to least squares\n regression. 'lad' (least absolute deviation) is a highly robust\n loss function solely based on order information of the input\n variables. 'huber' is a combination of the two. 'quantile'\n allows quantile regression (use `alpha` to specify the quantile).\n\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\nn_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\nsubsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\ncriterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are \"friedman_mse\" for the mean squared error with improvement\n score by Friedman, \"mse\" for mean squared error, and \"mae\" for\n the mean absolute error. The default value of \"friedman_mse\" is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). The correct way of minimizing the absolute\n error is to use `loss='lad'` instead.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_depth : int, default=3\n Maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\ninit : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n initial raw predictions are set to zero. By default a\n ``DummyEstimator`` is used, predicting either the average target value\n (for loss='ls'), or a quantile for the other losses.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nalpha : float, default=0.9\n The alpha-quantile of the huber loss function and the quantile\n loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations.\n\n .. versionadded:: 0.20\n\ntol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n The concrete ``LossFunction`` object.\n\ninit_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n The collection of fitted sub-estimators.\n\nn_classes_ : int\n The number of classes, set to 1 for regressors.\n\n .. deprecated:: 0.24\n Attribute ``n_classes_`` was deprecated in version 0.24 and\n will be removed in 1.1 (renaming of 0.26).\n\nn_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\nn_features_ : int\n The number of data features.\n\nmax_features_ : int\n The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingRegressor : Histogram-based Gradient Boosting\n Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.tree.RandomForestRegressor : A random forest regressor.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> reg = GradientBoostingRegressor(random_state=0)\n>>> reg.fit(X_train, y_train)\nGradientBoostingRegressor(random_state=0)\n>>> reg.predict(X_test[1:2])\narray([-61...])\n>>> reg.score(X_test, y_test)\n0.4...\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._gb_losses", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import expit", - "from scipy.special import logsumexp", - "from tree._tree import TREE_LEAF", - "from utils.stats import _weighted_percentile", - "from dummy import DummyClassifier", - "from dummy import DummyRegressor" - ], - "classes": [ - { - "name": "LossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Default ``init`` estimator for loss function. " - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "update_terminal_regions", - "decorators": [], - "parameters": [ - { - "name": "tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The tree object." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data array." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "residual", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The residuals (usually the negative gradient)." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample." - }, - { - "name": "sample_mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample mask to be used." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by ``learning_rate``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The index of the estimator being updated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update the terminal regions (=leaves) of the given tree and\nupdates the current predictions of the model. Traverses tree\nand invokes template method `_update_terminal_region`.\n\nParameters\n----------\ntree : tree.Tree\n The tree object.\nX : ndarray of shape (n_samples, n_features)\n The data array.\ny : ndarray of shape (n_samples,)\n The target labels.\nresidual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\nsample_weight : ndarray of shape (n_samples,)\n The weight of each sample.\nsample_mask : ndarray of shape (n_samples,)\n The sample mask to be used.\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\nk : int, default=0\n The index of the estimator being updated." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Template method for updating terminal regions (i.e., leaves)." - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data array." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to use to compute the predictions." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the initial raw predictions.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data array.\nestimator : object\n The estimator to use to compute the predictions.\n\nReturns\n-------\nraw_predictions : ndarray of shape (n_samples, K)\n The initial raw predictions. K is equal to 1 for binary\n classification and regression, and equal to the number of classes\n for multiclass classification. ``raw_predictions`` is casted\n into float64." - } - ], - "docstring": "Abstract base class for various loss functions.\n\nParameters\n----------\nn_classes : int\n Number of classes.\n\nAttributes\n----------\nK : int\n The number of regression trees to be induced;\n 1 for regression and binary classification;\n ``n_classes`` for multi-class classification." - }, - { - "name": "RegressionLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_init_estimator", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The init estimator to check." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure estimator has the required fit and predict methods.\n\nParameters\n----------\nestimator : object\n The init estimator to check." - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for regression loss functions." - }, - { - "name": "LeastSquaresError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the least squares loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute half of the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples,)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "update_terminal_regions", - "decorators": [], - "parameters": [ - { - "name": "tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The tree object." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data array." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "residual", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The residuals (usually the negative gradient)." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample." - }, - { - "name": "sample_mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sample mask to be used." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each tree by ``learning_rate``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The index of the estimator being updated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least squares does not need to update terminal regions.\n\nBut it has to update the predictions.\n\nParameters\n----------\ntree : tree.Tree\n The tree object.\nX : ndarray of shape (n_samples, n_features)\n The data array.\ny : ndarray of shape (n_samples,)\n The target labels.\nresidual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\nsample_weight : ndarray of shape (n,)\n The weight of each sample.\nsample_mask : ndarray of shape (n,)\n The sample mask to be used.\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\nk : int, default=0\n The index of the estimator being updated." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Loss function for least squares (LS) estimation.\nTerminal regions do not need to be updated for least squares.\n\nParameters\n----------\nn_classes : int\n Number of classes." - }, - { - "name": "LeastAbsoluteError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the least absolute error.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\n1.0 if y - raw_predictions > 0.0 else -1.0\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "LAD updates terminal regions to median estimates." - } - ], - "docstring": "Loss function for least absolute deviation (LAD) regression.\n\nParameters\n----------\nn_classes : int\n Number of classes" - }, - { - "name": "HuberLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Percentile at which to extract score." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Huber loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Huber loss function for robust regression.\n\nM-Regression proposed in Friedman 2001.\n\nParameters\n----------\nalpha : float, default=0.9\n Percentile at which to extract score.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001." - }, - { - "name": "QuantileLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The percentile." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Quantile loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Loss function for quantile regression.\n\nQuantile regression allows to estimate the percentiles\nof the conditional distribution of the target.\n\nParameters\n----------\nalpha : float, default=0.9\n The percentile." - }, - { - "name": "ClassificationLossFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [ - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Template method to convert raw predictions into probabilities.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nReturns\n-------\nprobas : ndarray of shape (n_samples, K)\n The predicted probabilities." - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [ - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Template method to convert raw predictions to decisions.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nReturns\n-------\nencoded_predictions : ndarray of shape (n_samples, K)\n The predicted encoded labels." - }, - { - "name": "check_init_estimator", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The init estimator to check." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure estimator has fit and predict_proba methods.\n\nParameters\n----------\nestimator : object\n The init estimator to check." - } - ], - "docstring": "Base class for classification loss functions. " - }, - { - "name": "BinomialDeviance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the deviance (= 2 * negative log-likelihood).\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute half of the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\n sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual" - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binomial deviance loss function for binary classification.\n\nBinary classification is a special case; here, we only need to\nfit one tree instead of ``n_classes`` trees.\n\nParameters\n----------\nn_classes : int\n Number of classes." - }, - { - "name": "MultinomialDeviance", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Multinomial deviance.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The index of the class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute negative gradient for the ``k``-th class.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\nk : int, default=0\n The index of the class." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a single Newton-Raphson step. " - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multinomial deviance loss function for multi-class classification.\n\nFor multi-class classification we need to fit ``n_classes`` trees at\neach stage.\n\nParameters\n----------\nn_classes : int\n Number of classes." - }, - { - "name": "ExponentialLoss", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "init_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the exponential loss\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights." - }, - { - "name": "negative_gradient", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw predictions (i.e. values from the tree leaves) of the tree ensemble at iteration ``i - 1``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the residual (= negative gradient).\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``." - }, - { - "name": "_update_terminal_region", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_raw_prediction_to_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_init_raw_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Exponential loss function for binary classification.\n\nSame loss as AdaBoost.\n\nParameters\n----------\nn_classes : int\n Number of classes.\n\nReferences\n----------\nGreg Ridgeway, Generalized Boosted Models: A guide to the gbm package, 2007" - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._iforest", - "imports": [ - "import numbers", - "import numpy as np", - "from scipy.sparse import issparse", - "from warnings import warn", - "from tree import ExtraTreeRegressor", - "from utils import check_random_state", - "from utils import check_array", - "from utils import gen_batches", - "from utils import get_chunk_n_rows", - "from utils.fixes import _joblib_parallel_args", - "from utils.validation import check_is_fitted", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from base import OutlierMixin", - "from _bagging import BaseBagging" - ], - "classes": [ - { - "name": "IsolationForest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of base estimators in the ensemble." - }, - { - "name": "max_samples", - "type": "Union[float, int, Literal[\"auto\"]]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of samples to draw from X to train each base estimator. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples. - If \"auto\", then `max_samples=min(256, n_samples)`. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling)." - }, - { - "name": "contamination", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to define the threshold on the scores of the samples. - If 'auto', the threshold is determined as in the original paper. - If float, the contamination should be in the range [0, 0.5]. .. versionchanged:: 0.22 The default value of ``contamination`` changed from 0.1 to ``'auto'``." - }, - { - "name": "max_features", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of features to draw from X to train each base estimator. - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features." - }, - { - "name": "bootstrap", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, individual trees are fit on random subsets of the training data sampled with replacement. If False, sampling without replacement is performed." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for both :meth:`fit` and :meth:`predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo-randomness of the selection of the feature and split values for each branching step and each tree in the forest. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity of the tree building process." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `. .. versionadded:: 0.21" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_parallel_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Use ``dtype=np.float32`` for maximum efficiency. Sparse matrices are also supported, use sparse ``csc_matrix`` for maximum efficiency." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Fitted estimator." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict if a particular sample is an outlier or not.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n For each observation, tells whether or not (+1 or -1) it should\n be considered as an inlier according to the fitted model." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average anomaly score of X of the base classifiers.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal. Negative scores represent outliers,\n positive scores represent inliers." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the anomaly score defined in the original paper.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal." - }, - { - "name": "_compute_chunked_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data matrix." - }, - { - "name": "subsample_features", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether features should be subsampled." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the score of each samples in X going through the extra trees.\n\nParameters\n----------\nX : array-like or sparse matrix\n Data matrix.\n\nsubsample_features : bool\n Whether features should be subsampled." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_estimators : int, default=100\n The number of base estimators in the ensemble.\n\nmax_samples : \"auto\", int or float, default=\"auto\"\n The number of samples to draw from X to train each base estimator.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n If max_samples is larger than the number of samples provided,\n all samples will be used for all trees (no sampling).\n\ncontamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Used when fitting to define the threshold\n on the scores of the samples.\n\n - If 'auto', the threshold is determined as in the\n original paper.\n - If float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator.\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=False\n If True, individual trees are fit on random subsets of the training\n data sampled with replacement. If False, sampling without replacement\n is performed.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo-randomness of the selection of the feature\n and split values for each branching step and each tree in the forest.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity of the tree building process.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n .. versionadded:: 0.21\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor instance\n The child estimator template used to create the collection of\n fitted sub-estimators.\n\nestimators_ : list of ExtraTreeRegressor instances\n The collection of fitted sub-estimators.\n\nestimators_features_ : list of ndarray\n The subset of drawn features for each base estimator.\n\nestimators_samples_ : list of ndarray\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator.\n\nmax_samples_ : int\n The actual number of samples.\n\noffset_ : float\n Offset used to define the decision function from the raw scores. We\n have the relation: ``decision_function = score_samples - offset_``.\n ``offset_`` is defined as follows. When the contamination parameter is\n set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n close to 0 and the scores of outliers are close to -1. When a\n contamination parameter different than \"auto\" is provided, the offset\n is defined in such a way we obtain the expected number of outliers\n (samples with decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nNotes\n-----\nThe implementation is based on an ensemble of ExtraTreeRegressor. The\nmaximum depth of each tree is set to ``ceil(log_2(n))`` where\n:math:`n` is the number of samples used to build the tree\n(see (Liu et al., 2008) for more details).\n\nReferences\n----------\n.. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n.. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n anomaly detection.\" ACM Transactions on Knowledge Discovery from\n Data (TKDD) 6.1 (2012): 3.\n\nSee Also\n----------\nsklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n Gaussian distributed dataset.\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n Estimate the support of a high-dimensional distribution.\n The implementation is based on libsvm.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n using Local Outlier Factor (LOF).\n\nExamples\n--------\n>>> from sklearn.ensemble import IsolationForest\n>>> X = [[-1.1], [0.3], [0.5], [100]]\n>>> clf = IsolationForest(random_state=0).fit(X)\n>>> clf.predict([[0.1], [0], [90]])\narray([ 1, 1, -1])" - } - ], - "functions": [ - { - "name": "_average_path_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\n The number of training samples in each test sample leaf, for\n each estimators.\n\nReturns\n-------\naverage_path_length : ndarray of shape (n_samples,)" - } - ] - }, - { - "name": "sklearn.ensemble._stacking", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from copy import deepcopy", - "import numpy as np", - "from joblib import Parallel", - "import scipy.sparse as sparse", - "from base import clone", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import is_classifier", - "from base import is_regressor", - "from exceptions import NotFittedError", - "from utils._estimator_html_repr import _VisualBlock", - "from _base import _fit_single_estimator", - "from _base import _BaseHeterogeneousEnsemble", - "from linear_model import LogisticRegression", - "from linear_model import RidgeCV", - "from model_selection import cross_val_predict", - "from model_selection import check_cv", - "from preprocessing import LabelEncoder", - "from utils import Bunch", - "from utils.metaestimators import if_delegate_has_method", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "_BaseStacking", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_clone_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_concatenate_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)" - }, - { - "name": "_method_name", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights. .. versionchanged:: 0.23 when not None, `sample_weight` is passed to all underlying estimators" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,) or default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionchanged:: 0.23\n when not None, `sample_weight` is passed to all underlying\n estimators\n\nReturns\n-------\nself : object" - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Number of features seen during :term:`fit`." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Concatenate and return the predictions of the estimators." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "**predict_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to the `predict` called by the `final_estimator`. Note that this may be used to return uncertainties from some estimators with `return_std` or `return_cov`. Be aware that it will only accounts for uncertainty in the final estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n**predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets." - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for stacking method." - }, - { - "name": "StackingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Base estimators which will be stacked together. Each element of the list is defined as a tuple of string (i.e. name) and an estimator instance. An estimator can be set to 'drop' using `set_params`." - }, - { - "name": "final_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A classifier which will be used to combine the base estimators. The default classifier is a :class:`~sklearn.linear_model.LogisticRegression`." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy used in `cross_val_predict` to train `final_estimator`. Possible inputs for cv are: * None, to use the default 5-fold cross validation, * integer, to specify the number of folds in a (Stratified) KFold, * An object to be used as a cross-validation generator, * An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and y is either binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: A larger number of split will provide no benefits if the number of training samples is large enough. Indeed, the training time will increase. ``cv`` is not used for model evaluation but for prediction." - }, - { - "name": "stack_method", - "type": "Literal['auto', 'predict_proba', 'decision_function', 'predict']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Methods called for each base estimator. It can be: * if 'auto', it will try to invoke, for each estimator, `'predict_proba'`, `'decision_function'` or `'predict'` in that order. * otherwise, one of `'predict_proba'`, `'decision_function'` or `'predict'`. If the method is not implemented by the estimator, it will raise an error." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel all `estimators` `fit`. `None` means 1 unless in a `joblib.parallel_backend` context. -1 means using all processors. See Glossary for more details." - }, - { - "name": "passthrough", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When False, only the predictions of estimators will be used as training data for `final_estimator`. When True, the `final_estimator` is trained on the predictions as well as the original training data." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "**predict_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to the `predict` called by the `final_estimator`. Note that this may be used to return uncertainties from some estimators with `return_std` or `return_cov`. Be aware that it will only accounts for uncertainty in the final estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n**predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X using\n`final_estimator_.predict_proba`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes) or list of ndarray of shape (n_output,)\n The class probabilities of the input samples." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict decision function for samples in X using\n`final_estimator_.decision_function`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\ndecisions : ndarray of shape (n_samples,), (n_samples, n_classes), or (n_samples, n_classes * (n_classes-1) / 2)\n The decision function computed the final estimator." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return class labels or probabilities for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\ny_preds : ndarray of shape (n_samples, n_estimators) or (n_samples, n_classes * n_estimators)\n Prediction outputs for each estimator." - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n A classifier which will be used to combine the base estimators.\n The default classifier is a\n :class:`~sklearn.linear_model.LogisticRegression`.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\nstack_method : {'auto', 'predict_proba', 'decision_function', 'predict'}, default='auto'\n Methods called for each base estimator. It can be:\n\n * if 'auto', it will try to invoke, for each estimator,\n `'predict_proba'`, `'decision_function'` or `'predict'` in that\n order.\n * otherwise, one of `'predict_proba'`, `'decision_function'` or\n `'predict'`. If the method is not implemented by the estimator, it\n will raise an error.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel all `estimators` `fit`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\nverbose : int, default=0\n Verbosity level.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\nfinal_estimator_ : estimator\n The classifier which predicts given the output of `estimators_`.\n\nstack_method_ : list of str\n The method used by each base estimator.\n\nNotes\n-----\nWhen `predict_proba` is used by each estimator (i.e. most of the time for\n`stack_method='auto'` or specifically for `stack_method='predict_proba'`),\nThe first column predicted by each estimator will be dropped in the case\nof a binary classification problem. Indeed, both feature will be perfectly\ncollinear.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.ensemble import StackingClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> estimators = [\n... ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n... ('svr', make_pipeline(StandardScaler(),\n... LinearSVC(random_state=42)))\n... ]\n>>> clf = StackingClassifier(\n... estimators=estimators, final_estimator=LogisticRegression()\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, stratify=y, random_state=42\n... )\n>>> clf.fit(X_train, y_train).score(X_test, y_test)\n0.9..." - }, - { - "name": "StackingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Base estimators which will be stacked together. Each element of the list is defined as a tuple of string (i.e. name) and an estimator instance. An estimator can be set to 'drop' using `set_params`." - }, - { - "name": "final_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A regressor which will be used to combine the base estimators. The default regressor is a :class:`~sklearn.linear_model.RidgeCV`." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy used in `cross_val_predict` to train `final_estimator`. Possible inputs for cv are: * None, to use the default 5-fold cross validation, * integer, to specify the number of folds in a (Stratified) KFold, * An object to be used as a cross-validation generator, * An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and y is either binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: A larger number of split will provide no benefits if the number of training samples is large enough. Indeed, the training time will increase. ``cv`` is not used for model evaluation but for prediction." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for `fit` of all `estimators`. `None` means 1 unless in a `joblib.parallel_backend` context. -1 means using all processors. See Glossary for more details." - }, - { - "name": "passthrough", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When False, only the predictions of estimators will be used as training data for `final_estimator`. When True, the `final_estimator` is trained on the predictions as well as the original training data." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_final_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\ny_preds : ndarray of shape (n_samples, n_estimators)\n Prediction outputs for each estimator." - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n A regressor which will be used to combine the base estimators.\n The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for `fit` of all `estimators`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\nverbose : int, default=0\n Verbosity level.\n\nAttributes\n----------\nestimators_ : list of estimator\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n\nfinal_estimator_ : estimator\n The regressor to stacked the base estimators fitted.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import StackingRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> estimators = [\n... ('lr', RidgeCV()),\n... ('svr', LinearSVR(random_state=42))\n... ]\n>>> reg = StackingRegressor(\n... estimators=estimators,\n... final_estimator=RandomForestRegressor(n_estimators=10,\n... random_state=42)\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=42\n... )\n>>> reg.fit(X_train, y_train).score(X_test, y_test)\n0.3..." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._voting", - "imports": [ - "from abc import abstractmethod", - "import numpy as np", - "from joblib import Parallel", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import clone", - "from _base import _fit_single_estimator", - "from _base import _BaseHeterogeneousEnsemble", - "from preprocessing import LabelEncoder", - "from utils import Bunch", - "from utils.validation import check_is_fitted", - "from utils.multiclass import check_classification_targets", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from exceptions import NotFittedError", - "from utils._estimator_html_repr import _VisualBlock", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "_BaseVoting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_log_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_weights_not_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the weights of not `None` estimators." - }, - { - "name": "_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Collect results from clf.predict calls." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get common fit operations." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input samples" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (None for unsupervised transformations)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional fit parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return class labels or probabilities for each estimator.\n\nReturn predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n Input samples\n\ny : ndarray of shape (n_samples,), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array." - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for voting.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "VotingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones of those original estimators that will be stored in the class attribute ``self.estimators_``. An estimator can be set to ``'drop'`` using ``set_params``. .. versionchanged:: 0.21 ``'drop'`` is accepted. Using None was deprecated in 0.22 and support was removed in 0.24." - }, - { - "name": "voting", - "type": "Literal['hard', 'soft']", - "hasDefault": true, - "default": "'hard'", - "limitation": null, - "ignored": false, - "docstring": "If 'hard', uses predicted class labels for majority rule voting. Else if 'soft', predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers." - }, - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sequence of weights (`float` or `int`) to weight the occurrences of predicted class labels (`hard` voting) or class probabilities before averaging (`soft` voting). Uses uniform weights if `None`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for ``fit``. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18" - }, - { - "name": "flatten_transform", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Affects shape of transform output only when voting='soft' If voting='soft' and flatten_transform=True, transform method returns matrix with shape (n_samples, n_classifiers * n_classes). If flatten_transform=False, it returns (n_classifiers, n_samples, n_classes)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting will be printed as it is completed. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class labels for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nmaj : array-like of shape (n_samples,)\n Predicted class labels." - }, - { - "name": "_collect_probas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Collect results from clf.predict calls." - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X in 'soft' voting." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute probabilities of possible outcomes for samples in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\navg : array-like of shape (n_samples, n_classes)\n Weighted average probability for each class per sample." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return class labels or probabilities for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nprobabilities_or_labels\n If `voting='soft'` and `flatten_transform=True`:\n returns ndarray of shape (n_classifiers, n_samples *\n n_classes), being class probabilities calculated by each\n classifier.\n If `voting='soft' and `flatten_transform=False`:\n ndarray of shape (n_classifiers, n_samples, n_classes)\n If `voting='hard'`:\n ndarray of shape (n_samples, n_classifiers), being\n class labels predicted by each classifier." - } - ], - "docstring": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'``\n using ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\nvoting : {'hard', 'soft'}, default='hard'\n If 'hard', uses predicted class labels for majority rule voting.\n Else if 'soft', predicts the class label based on the argmax of\n the sums of the predicted probabilities, which is recommended for\n an ensemble of well-calibrated classifiers.\n\nweights : array-like of shape (n_classifiers,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted class labels (`hard` voting) or class probabilities\n before averaging (`soft` voting). Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nflatten_transform : bool, default=True\n Affects shape of transform output only when voting='soft'\n If voting='soft' and flatten_transform=True, transform method returns\n matrix with shape (n_samples, n_classifiers * n_classes). If\n flatten_transform=False, it returns\n (n_classifiers, n_samples, n_classes).\n\nverbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of classifiers\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\nclasses_ : array-like of shape (n_predictions,)\n The classes labels.\n\nSee Also\n--------\nVotingRegressor : Prediction voting regressor.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n>>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n>>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n>>> clf3 = GaussianNB()\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> eclf1 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n>>> eclf1 = eclf1.fit(X, y)\n>>> print(eclf1.predict(X))\n[1 1 1 2 2 2]\n>>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n... eclf1.named_estimators_['lr'].predict(X))\nTrue\n>>> eclf2 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n... voting='soft')\n>>> eclf2 = eclf2.fit(X, y)\n>>> print(eclf2.predict(X))\n[1 1 1 2 2 2]\n>>> eclf3 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n... voting='soft', weights=[2,1,1],\n... flatten_transform=True)\n>>> eclf3 = eclf3.fit(X, y)\n>>> print(eclf3.predict(X))\n[1 1 1 2 2 2]\n>>> print(eclf3.transform(X).shape)\n(6, 6)" - }, - { - "name": "VotingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones of those original estimators that will be stored in the class attribute ``self.estimators_``. An estimator can be set to ``'drop'`` using ``set_params``. .. versionchanged:: 0.21 ``'drop'`` is accepted. Using None was deprecated in 0.22 and support was removed in 0.24." - }, - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sequence of weights (`float` or `int`) to weight the occurrences of predicted values before averaging. Uses uniform weights if `None`." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to run in parallel for ``fit``. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the time elapsed while fitting will be printed as it is completed. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Note that this is supported only if all underlying estimators support sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object\n Fitted estimator." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\npredictions: ndarray of shape (n_samples, n_classifiers)\n Values predicted by each regressor." - } - ], - "docstring": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'`` using\n ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\nweights : array-like of shape (n_regressors,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted values before averaging. Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of regressors\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\nnamed_estimators_ : Bunch\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\nSee Also\n--------\nVotingClassifier : Soft Voting/Majority Rule classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import VotingRegressor\n>>> r1 = LinearRegression()\n>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n>>> y = np.array([2, 6, 12, 20, 30, 42])\n>>> er = VotingRegressor([('lr', r1), ('rf', r2)])\n>>> print(er.fit(X, y).predict(X))\n[ 3.3 5.7 11.8 19.7 28. 40.3]" - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._weight_boosting", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import xlogy", - "from _base import BaseEnsemble", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import is_classifier", - "from base import is_regressor", - "from tree import DecisionTreeClassifier", - "from tree import DecisionTreeRegressor", - "from utils import check_array", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils.extmath import softmax", - "from utils.extmath import stable_cumsum", - "from metrics import accuracy_score", - "from metrics import r2_score", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import has_fit_parameter", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BaseWeightBoosting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, the sample weights are initialized to 1 / n_samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a boosted classifier/regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\nReturns\n-------\nself : object" - }, - { - "name": "_boost", - "decorators": [], - "parameters": [ - { - "name": "iboost", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the current boost iteration." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current sample weights." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current random number generator" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost.\n\nWarning: This method needs to be overridden by subclasses.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState\n The current random number generator\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nerror : float\n The classification error for the current boost.\n If None then boosting has terminated early." - }, - { - "name": "staged_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return staged scores for X, y.\n\nThis generator method yields the ensemble score after each iteration of\nboosting and therefore allows monitoring, such as to determine the\nscore on a test set after each boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n Labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nYields\n------\nz : float" - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances." - } - ], - "docstring": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead." - }, - { - "name": "AdaBoostClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the boosted ensemble is built. Support for sample weighting is required, as well as proper ``classes_`` and ``n_classes_`` attributes. If ``None``, then the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier` initialized with `max_depth=1`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each classifier by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``." - }, - { - "name": "algorithm", - "type": "Literal['SAMME', 'SAMME.R']", - "hasDefault": true, - "default": "'SAMME", - "limitation": null, - "ignored": false, - "docstring": "If 'SAMME.R' then use the SAMME.R real boosting algorithm. ``base_estimator`` must support calculation of class probabilities. If 'SAMME' then use the SAMME discrete boosting algorithm. The SAMME.R algorithm typically converges faster than SAMME, achieving a lower test error with fewer boosting iterations." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given at each `base_estimator` at each boosting iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, the sample weights are initialized to ``1 / n_samples``." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a boosted classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n ``1 / n_samples``.\n\nReturns\n-------\nself : object\n Fitted estimator." - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_boost", - "decorators": [], - "parameters": [ - { - "name": "iboost", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the current boost iteration." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current sample weights." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The RandomState instance used if the base estimator accepts a `random_state` attribute." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost.\n\nPerform a single boost according to the real multi-class SAMME.R\nalgorithm or to the discrete SAMME algorithm and return the updated\nsample weights.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState instance\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nestimator_error : float\n The classification error for the current boost.\n If None then boosting has terminated early." - }, - { - "name": "_boost_real", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost using the SAMME.R real algorithm." - }, - { - "name": "_boost_discrete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost using the SAMME discrete algorithm." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict classes for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted classes." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return staged predictions for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n------\ny : generator of ndarray of shape (n_samples,)\n The predicted classes." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\nscore : ndarray of shape of (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively." - }, - { - "name": "staged_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute decision function of ``X`` for each boosting iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each boosting iteration.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n------\nscore : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively." - }, - { - "name": "_compute_proba_from_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\n p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n = softmax((1 / K-1) * f(X))\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n 2009." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute." - }, - { - "name": "staged_predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nThis generator method yields the ensemble predicted class probabilities\nafter each iteration of boosting and therefore allows monitoring, such\nas to determine the predicted class probabilities on a test set after\neach boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n-------\np : generator of ndarray of shape (n_samples,)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe weighted mean predicted class log-probabilities of the classifiers\nin the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute." - } - ], - "docstring": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n Support for sample weighting is required, as well as proper\n ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n initialized with `max_depth=1`.\n\nn_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\nlearning_rate : float, default=1.\n Learning rate shrinks the contribution of each classifier by\n ``learning_rate``. There is a trade-off between ``learning_rate`` and\n ``n_estimators``.\n\nalgorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n ``base_estimator`` must support calculation of class probabilities.\n If 'SAMME' then use the SAMME discrete boosting algorithm.\n The SAMME.R algorithm typically converges faster than SAMME,\n achieving a lower test error with fewer boosting iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_classes_ : int\n The number of classes.\n\nestimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n Classification error for each estimator in the boosted\n ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nSee Also\n--------\nAdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n regressor on the original dataset and then fits additional copies of\n the regressor on the same dataset but where the weights of instances\n are adjusted according to the error of the current prediction.\n\nGradientBoostingClassifier : GB builds an additive model in a forward\n stage-wise fashion. Regression trees are fit on the negative gradient\n of the binomial or multinomial deviance loss function. Binary\n classification is a special case where only a single regression tree is\n induced.\n\nsklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n method used for classification.\n Creates a model that predicts the value of a target variable by\n learning simple decision rules inferred from the data features.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nAdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n>>> clf.score(X, y)\n0.983..." - }, - { - "name": "AdaBoostRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the boosted ensemble is built. If ``None``, then the base estimator is :class:`~sklearn.tree.DecisionTreeRegressor` initialized with `max_depth=3`." - }, - { - "name": "n_estimators", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Learning rate shrinks the contribution of each regressor by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``." - }, - { - "name": "loss", - "type": "Literal['linear', 'square', 'exponential']", - "hasDefault": true, - "default": "'linear'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to use when updating the weights after each boosting iteration." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given at each `base_estimator` at each boosting iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. In addition, it controls the bootstrap of the weights used to train the `base_estimator` at each boosting iteration. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (real numbers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, the sample weights are initialized to 1 / n_samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a boosted regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (real numbers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\nReturns\n-------\nself : object" - }, - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the estimator and set the base_estimator_ attribute." - }, - { - "name": "_boost", - "decorators": [], - "parameters": [ - { - "name": "iboost", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the current boost iteration." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The current sample weights." - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The RandomState instance used if the base estimator accepts a `random_state` attribute. Controls also the bootstrap of the weights used to train the weak learner. replacement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implement a single boost for regression\n\nPerform a single boost according to the AdaBoost.R2 algorithm and\nreturn the updated sample weights.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n Controls also the bootstrap of the weights used to train the weak\n learner.\n replacement.\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nestimator_error : float\n The regression error for the current boost.\n If None then boosting has terminated early." - }, - { - "name": "_get_median_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression value for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted regression values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return staged predictions for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted regression values." - } - ], - "docstring": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n If ``None``, then the base estimator is\n :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n `max_depth=3`.\n\nn_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\nlearning_rate : float, default=1.\n Learning rate shrinks the contribution of each regressor by\n ``learning_rate``. There is a trade-off between ``learning_rate`` and\n ``n_estimators``.\n\nloss : {'linear', 'square', 'exponential'}, default='linear'\n The loss function to use when updating the weights after each\n boosting iteration.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n In addition, it controls the bootstrap of the weights used to train the\n `base_estimator` at each boosting iteration.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\nestimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n Regression error for each estimator in the boosted ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n... random_state=0, shuffle=False)\n>>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n>>> regr.fit(X, y)\nAdaBoostRegressor(n_estimators=100, random_state=0)\n>>> regr.predict([[0, 0, 0, 0]])\narray([4.7972...])\n>>> regr.score(X, y)\n0.9771...\n\nSee Also\n--------\nAdaBoostClassifier, GradientBoostingRegressor,\nsklearn.tree.DecisionTreeRegressor\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997." - } - ], - "functions": [ - { - "name": "_samme_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate algorithm 4, step 2, equation c) of Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009." - } - ] - }, - { - "name": "sklearn.ensemble", - "imports": [ - "import typing", - "from _base import BaseEnsemble", - "from _forest import RandomForestClassifier", - "from _forest import RandomForestRegressor", - "from _forest import RandomTreesEmbedding", - "from _forest import ExtraTreesClassifier", - "from _forest import ExtraTreesRegressor", - "from _bagging import BaggingClassifier", - "from _bagging import BaggingRegressor", - "from _iforest import IsolationForest", - "from _weight_boosting import AdaBoostClassifier", - "from _weight_boosting import AdaBoostRegressor", - "from _gb import GradientBoostingClassifier", - "from _gb import GradientBoostingRegressor", - "from _voting import VotingClassifier", - "from _voting import VotingRegressor", - "from _stacking import StackingClassifier", - "from _stacking import StackingRegressor", - "from _hist_gradient_boosting.gradient_boosting import HistGradientBoostingRegressor", - "from _hist_gradient_boosting.gradient_boosting import HistGradientBoostingClassifier" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble.tests.test_bagging", - "imports": [ - "from itertools import product", - "import numpy as np", - "import joblib", - "import pytest", - "from sklearn.base import BaseEstimator", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.ensemble import BaggingClassifier", - "from sklearn.ensemble import BaggingRegressor", - "from sklearn.linear_model import Perceptron", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.random_projection import SparseRandomProjection", - "from sklearn.pipeline import make_pipeline", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_hastie_10_2", - "from sklearn.utils import check_random_state", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.preprocessing import scale", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix" - ], - "classes": [ - { - "name": "DummySizeEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyZeroEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bootstrap_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bootstrap_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_with_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_sample_weight_unsupported_but_passed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_with_oob_score_fails", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_removed_on_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimators_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimators_samples_deterministic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_samples_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_oob_score_label_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "replace", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_regressor_with_missing_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_classifier_with_missing_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_small_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bagging_get_estimators_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_base", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.datasets import load_iris", - "from sklearn.ensemble import BaggingClassifier", - "from sklearn.ensemble._base import _set_random_states", - "from sklearn.linear_model import Perceptron", - "from collections import OrderedDict", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis", - "from sklearn.pipeline import Pipeline", - "from sklearn.feature_selection import SelectFromModel" - ], - "classes": [], - "functions": [ - { - "name": "test_base", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_zero_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_not_int_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_random_states", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_common", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.base import clone", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import is_classifier", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_diabetes", - "from sklearn.impute import SimpleImputer", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.pipeline import make_pipeline", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.ensemble import StackingRegressor", - "from sklearn.ensemble import VotingClassifier", - "from sklearn.ensemble import VotingRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_ensemble_heterogeneous_estimators_behavior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ensemble_heterogeneous_estimators_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ensemble_heterogeneous_estimators_name_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ensemble_heterogeneous_estimators_all_dropped", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_heterogeneous_ensemble_support_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_forest", - "imports": [ - "import pickle", - "import math", - "from collections import defaultdict", - "import itertools", - "from itertools import combinations", - "from itertools import product", - "from typing import Dict", - "from typing import Any", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.special import comb", - "import pytest", - "import joblib", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import skip_if_no_parallel", - "from sklearn.utils.fixes import parse_version", - "from sklearn.exceptions import NotFittedError", - "from sklearn import datasets", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.datasets import make_classification", - "from sklearn.ensemble import ExtraTreesClassifier", - "from sklearn.ensemble import ExtraTreesRegressor", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import RandomTreesEmbedding", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.svm import LinearSVC", - "from sklearn.utils.validation import check_random_state", - "from sklearn.tree._classes import SPARSE_SPLITTERS" - ], - "classes": [ - { - "name": "MyBackend", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "start_call", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "check_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check classification on a toy dataset." - }, - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_iris_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regression_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressor_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances_asymptotic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unfitted_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_oob_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_regressors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_oob_score_raise_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_score_raise_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check parallel computations in classification" - }, - { - "name": "test_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classes_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_trees_dense_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_trees_dense_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_hasher", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_hasher_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_samples_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_fraction_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_fraction_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_memory_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_memory_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_balanced_and_bootstrap_multi_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_balanced_and_bootstrap_multi_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_warm_start_oob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_oob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_convert", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_decrease", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_backend_respected", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_forest_feature_importances_sum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_forest_degenerate_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_samples_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_forest_y_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_little_tree_with_small_max_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_gradient_boosting", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.special import expit", - "import pytest", - "from sklearn import datasets", - "from sklearn.base import clone", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble._gradient_boosting import predict_stages", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.preprocessing import scale", - "from sklearn.svm import LinearSVC", - "from sklearn.metrics import mean_squared_error", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils import check_random_state", - "from sklearn.utils import tosequence", - "from sklearn.utils._mocking import NoSampleWeightWrapper", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.exceptions import DataConversionWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.pipeline import make_pipeline", - "from sklearn.linear_model import LinearRegression", - "from sklearn.svm import NuSVR", - "import cPickle as pickle", - "import pickle", - "from io import StringIO", - "import sys", - "from sklearn.tree._tree import TREE_LEAF" - ], - "classes": [], - "functions": [ - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbdt_parameter_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbdt_loss_alpha_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_type_loss_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_synthetic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_synthetic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability_log", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_class_with_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_inputs_predict_stages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_feature_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_importance_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that Gini importance is calculated correctly.\n\nThis test follows the example from [1]_ (pg. 373).\n\n.. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements\n of statistical learning. New York: Springer series in statistics." - }, - { - "name": "test_max_feature_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_functions_defensive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_serialization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_degenerate_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_symbol_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_float_class_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shape_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mem_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_improvement", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_improvement_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oob_multilcass_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_more_verbose_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_zero_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_smaller_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_oob_switch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_oob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_fortran", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "early_stopping_monitor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns True on the 10th iteration. " - }, - { - "name": "test_monitor_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complete_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complete_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_estimator_reg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_estimator_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_decrease", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_wo_nestimators_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability_exponential", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_uniform_weights_toy_edge_case_reg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_uniform_weights_toy_edge_case_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_validation_fraction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_stratified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_with_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_with_init_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_boosting_init_wrong_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_n_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbr_degenerate_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gbr_deprecated_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_attr_error_raised_if_not_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_criterion_mae_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_gradient_boosting_loss_functions", - "imports": [ - "from itertools import product", - "import numpy as np", - "from numpy.testing import assert_allclose", - "import pytest", - "from pytest import approx", - "from sklearn.utils import check_random_state", - "from sklearn.ensemble._gb_losses import RegressionLossFunction", - "from sklearn.ensemble._gb_losses import LeastSquaresError", - "from sklearn.ensemble._gb_losses import LeastAbsoluteError", - "from sklearn.ensemble._gb_losses import HuberLossFunction", - "from sklearn.ensemble._gb_losses import QuantileLossFunction", - "from sklearn.ensemble._gb_losses import BinomialDeviance", - "from sklearn.ensemble._gb_losses import MultinomialDeviance", - "from sklearn.ensemble._gb_losses import ExponentialLoss", - "from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS" - ], - "classes": [], - "functions": [ - { - "name": "test_binomial_deviance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_smoke", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_init_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_loss_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_deviance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_deviance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mdl_computation_weighted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mdl_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_raw_predictions_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_raw_predictions_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lad_equals_quantile_50", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_iforest", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.ensemble import IsolationForest", - "from sklearn.ensemble._iforest import _average_path_length", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.utils import check_random_state", - "from sklearn.metrics import roc_auc_score", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from unittest.mock import Mock", - "from unittest.mock import patch" - ], - "classes": [], - "functions": [ - { - "name": "test_iforest", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check Isolation Forest for various parameter settings." - }, - { - "name": "test_iforest_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check IForest for various parameter settings on sparse input." - }, - { - "name": "test_iforest_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that it gives proper exception on deficient input." - }, - { - "name": "test_recalculate_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check max_depth recalculation when max_samples is reset to n_samples" - }, - { - "name": "test_max_samples_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_parallel_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check parallel regression." - }, - { - "name": "test_iforest_performance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test Isolation Forest performs well" - }, - { - "name": "test_iforest_works", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_samples_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_subsampled_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_average_path_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test iterative addition of iTrees to an iForest " - }, - { - "name": "test_iforest_chunks_works1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_chunks_works2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iforest_with_uniform_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether iforest predicts inliers when using uniform data" - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_stacking", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sparse", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import RegressorMixin", - "from sklearn.base import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_classification", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.svm import SVC", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.preprocessing import scale", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.ensemble import StackingRegressor", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import KFold", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [ - { - "name": "NoWeightRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoWeightClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_stacking_classifier_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_drop_column_binary_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_drop_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_drop_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_sparse_passthrough", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_sparse_passthrough", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_drop_binary_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_randomness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_stratify_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_with_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classifier_sample_weight_fit_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_cv_influence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_without_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_voting", - "imports": [ - "import warnings", - "import pytest", - "import re", - "import numpy as np", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.naive_bayes import GaussianNB", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import VotingClassifier", - "from sklearn.ensemble import VotingRegressor", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.model_selection import GridSearchCV", - "from sklearn import datasets", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.svm import SVC", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.dummy import DummyRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_voting_classifier_estimator_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predictproba_hardvoting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_notfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_majority_label_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check classification by majority label on dataset iris." - }, - { - "name": "test_tie_situation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check voting classifier selects smaller class label in tie situation." - }, - { - "name": "test_weights_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check classification by average probabilities on dataset iris." - }, - { - "name": "test_weights_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check weighted average regression prediction on diabetes dataset." - }, - { - "name": "test_predict_on_toy_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Manually check predicted class labels for toy dataset." - }, - { - "name": "test_predict_proba_on_toy_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate predicted probabilities on toy dataset." - }, - { - "name": "test_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if error is raised for multilabel classification." - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check GridSearch support." - }, - { - "name": "test_parallel_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check parallel backend of VotingClassifier on toy dataset." - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests sample_weight parameter of VotingClassifier" - }, - { - "name": "test_sample_weight_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that VotingClassifier passes sample_weight as kwargs" - }, - { - "name": "test_voting_classifier_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_estimator_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimator_weights_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check transform method of VotingClassifier on toy dataset." - }, - { - "name": "test_none_estimator_with_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_voting_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests.test_weight_boosting", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.base import BaseEstimator", - "from sklearn.base import clone", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.linear_model import LinearRegression", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.ensemble import AdaBoostClassifier", - "from sklearn.ensemble import AdaBoostRegressor", - "from sklearn.ensemble._weight_boosting import _samme_proba", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.utils import shuffle", - "from sklearn.utils._mocking import NoSampleWeightWrapper", - "from sklearn import datasets", - "import pickle", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_samme_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass_adaboost_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_adaboost_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "AdaBoostRegressor should work without sample_weights in the base estimator\nThe random weighted sampling is done internally in the _boost method in\nAdaBoostRegressor." - }, - { - "name": "test_multidimensional_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the AdaBoost estimators can work with n-dimensional\ndata matrix" - }, - { - "name": "test_adaboostclassifier_without_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaboostregressor_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaboost_consistent_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaboost_negative_weight_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.binning", - "imports": [ - "import numpy as np", - "from utils import check_random_state", - "from utils import check_array", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from _binning import _map_to_bins", - "from common import X_DTYPE", - "from common import X_BINNED_DTYPE", - "from common import ALMOST_INF", - "from common import X_BITSET_INNER_DTYPE", - "from _bitset import set_bitset_memoryview" - ], - "classes": [ - { - "name": "_BinMapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_bins", - "type": "int", - "hasDefault": true, - "default": "256", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of bins to use (including the bin for missing values). Should be in [3, 256]. Non-missing values are binned on ``max_bins = n_bins - 1`` bins. The last bin is always reserved for missing values. If for a given feature the number of unique values is less than ``max_bins``, then those unique values will be used to compute the bin thresholds, instead of the quantiles. For categorical features indicated by ``is_categorical``, the docstring for ``is_categorical`` details on this procedure." - }, - { - "name": "subsample", - "type": "Optional[int]", - "hasDefault": true, - "default": "2e5", - "limitation": null, - "ignored": false, - "docstring": "If ``n_samples > subsample``, then ``sub_samples`` samples will be randomly chosen to compute the quantiles. If ``None``, the whole data is used." - }, - { - "name": "is_categorical", - "type": "NDArray[bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates categorical features. By default, all features are considered continuous." - }, - { - "name": "known_categories", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For each categorical feature, the array indicates the set of unique categorical values. These should be the possible values over all the data, not just the training data. For continuous features, the corresponding entry should be None." - }, - { - "name": "random_state: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the random sub-sampling. Pass an int for reproducible output across multiple function calls. See :term: `Glossary `." - }, - { - "name": "RandomState instance or None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the random sub-sampling. Pass an int for reproducible output across multiple function calls. See :term: `Glossary `." - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the random sub-sampling. Pass an int for reproducible output across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to bin." - }, - { - "name": "y: None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit data X by computing the binning thresholds.\n\nThe last bin is reserved for missing values, whether missing values\nare present in the data or not.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to bin.\ny: None\n Ignored.\n\nReturns\n-------\nself : object" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to bin." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bin data X.\n\nMissing values will be mapped to the last bin.\n\nFor categorical features, the mapping will be incorrect for unknown\ncategories. Since the BinMapper is given known_categories of the\nentire training data (i.e. before the call to train_test_split() in\ncase of early-stopping), this never happens.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to bin.\n\nReturns\n-------\nX_binned : array-like of shape (n_samples, n_features)\n The binned data (fortran-aligned)." - }, - { - "name": "make_known_categories_bitsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create bitsets of known categories.\n\nReturns\n-------\n- known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n- f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array." - } - ], - "docstring": "Transformer that maps a dataset into integer-valued bins.\n\nFor continuous features, the bins are created in a feature-wise fashion,\nusing quantiles so that each bins contains approximately the same number\nof samples. For large datasets, quantiles are computed on a subset of the\ndata to speed-up the binning, but the quantiles should remain stable.\n\nFor categorical features, the raw categorical values are expected to be\nin [0, 254] (this is not validated here though) and each category\ncorresponds to a bin. All categorical values must be known at\ninitialization: transform() doesn't know how to bin unknown categorical\nvalues. Note that transform() is only used on non-training data in the\ncase of early stopping.\n\nFeatures with a small number of values may be binned into less than\n``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved\nfor missing values.\n\nParameters\n----------\nn_bins : int, default=256\n The maximum number of bins to use (including the bin for missing\n values). Should be in [3, 256]. Non-missing values are binned on\n ``max_bins = n_bins - 1`` bins. The last bin is always reserved for\n missing values. If for a given feature the number of unique values is\n less than ``max_bins``, then those unique values will be used to\n compute the bin thresholds, instead of the quantiles. For categorical\n features indicated by ``is_categorical``, the docstring for\n ``is_categorical`` details on this procedure.\nsubsample : int or None, default=2e5\n If ``n_samples > subsample``, then ``sub_samples`` samples will be\n randomly chosen to compute the quantiles. If ``None``, the whole data\n is used.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features. By default, all features are\n considered continuous.\nknown_categories : list of {ndarray, None} of shape (n_features,), default=none\n For each categorical feature, the array indicates the set of unique\n categorical values. These should be the possible values over all the\n data, not just the training data. For continuous features, the\n corresponding entry should be None.\nrandom_state: int, RandomState instance or None, default=None\n Pseudo-random number generator to control the random sub-sampling.\n Pass an int for reproducible output across multiple\n function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nbin_thresholds_ : list of ndarray\n For each feature, each array indicates how to map a feature into a\n binned feature. The semantic and size depends on the nature of the\n feature:\n - for real-valued features, the array corresponds to the real-valued\n bin thresholds (the upper bound of each bin). There are ``max_bins\n - 1`` thresholds, where ``max_bins = n_bins - 1`` is the number of\n bins used for non-missing values.\n - for categorical features, the array is a map from a binned category\n value to the raw category value. The size of the array is equal to\n ``min(max_bins, category_cardinality)`` where we ignore missing\n values in the cardinality.\nn_bins_non_missing_ : ndarray, dtype=np.uint32\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this is\n equal to ``n_bins - 1``.\nis_categorical_ : ndarray of shape (n_features,), dtype=np.uint8\n Indicator for categorical features.\nmissing_values_bin_idx_ : np.uint8\n The index of the bin where missing values are mapped. This is a\n constant across all features. This corresponds to the last bin, and\n it is always equal to ``n_bins - 1``. Note that if ``n_bins_missing_``\n is less than ``n_bins - 1`` for a given feature, then there are\n empty (and unused) bins." - } - ], - "functions": [ - { - "name": "_find_binning_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract quantiles from a continuous feature.\n\nMissing values are ignored for finding the thresholds.\n\nParameters\n----------\ncol_data : array-like, shape (n_samples,)\n The continuous feature to bin.\nmax_bins: int\n The maximum number of bins to use for non-missing values. If for a\n given feature the number of unique values is less than ``max_bins``,\n then those unique values will be used to compute the bin thresholds,\n instead of the quantiles\n\nReturn\n------\nbinning_thresholds : ndarray of shape(min(max_bins, n_unique_values) - 1,)\n The increasing numeric values that can be used to separate the bins.\n A given value x will be mapped into bin value i iff\n bining_thresholds[i - 1] < x <= binning_thresholds[i]" - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", - "imports": [ - "from abc import ABC", - "from abc import abstractmethod", - "from functools import partial", - "import numpy as np", - "from timeit import default_timer as time", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import ClassifierMixin", - "from base import is_classifier", - "from utils import check_random_state", - "from utils import check_array", - "from utils import resample", - "from utils.validation import check_is_fitted", - "from utils.validation import check_consistent_length", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from metrics import check_scoring", - "from model_selection import train_test_split", - "from preprocessing import LabelEncoder", - "from _gradient_boosting import _update_raw_predictions", - "from common import Y_DTYPE", - "from common import X_DTYPE", - "from common import X_BINNED_DTYPE", - "from binning import _BinMapper", - "from grower import TreeGrower", - "from loss import _LOSSES", - "from loss import BaseLoss" - ], - "classes": [ - { - "name": "BaseHistGradientBoosting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate parameters passed to __init__.\n\nThe parameters that are directly passed to the grower are checked in\nTreeGrower." - }, - { - "name": "_check_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check and validate categorical features in X\n\nReturn\n------\nis_categorical : ndarray of shape (n_features,) or None, dtype=bool\n Indicates whether a feature is categorical. If no feature is\n categorical, this is None.\nknown_categories : list of size n_features or None\n The list contains, for each feature:\n - an array of shape (n_categories,) with the unique cat values\n - None if the feature is not categorical\n None if no feature is categorical." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Weights of training data.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object" - }, - { - "name": "_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_clear_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Clear the state of the gradient boosting model." - }, - { - "name": "_get_small_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the indices of the subsample set and return this set.\n\nFor efficiency, we need to subsample the training set to compute scores\nwith scorers." - }, - { - "name": "_check_early_stopping_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if fitting should be early-stopped based on scorer.\n\nScores are computed on validation data or on training data." - }, - { - "name": "_check_early_stopping_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if fitting should be early-stopped based on loss.\n\nScores are computed on validation data or on training data." - }, - { - "name": "_should_stop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True (do early stopping) if the last n scores aren't better\nthan the (n-1)th-to-last score, up to some tolerance." - }, - { - "name": "_bin_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bin data X.\n\nIf is_training_data, then fit the _bin_mapper attribute.\nElse, the binned data is converted to a C-contiguous array." - }, - { - "name": "_print_iteration_stats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print info about the current fitting iteration." - }, - { - "name": "_raw_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the sum of the leaves values over all predictors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nraw_predictions : array, shape (n_trees_per_iteration, n_samples)\n The raw predicted values." - }, - { - "name": "_predict_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Add the predictions of the predictors to raw_predictions." - }, - { - "name": "_staged_raw_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\nraw_predictions : generator of ndarray of shape (n_trees_per_iteration, n_samples)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray, shape (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_encode_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_iter_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for histogram-based gradient boosting estimators." - }, - { - "name": "HistGradientBoostingRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['least_squares', 'least_absolute_deviation', 'poisson']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function to use in the boosting process. Note that the \"least squares\" and \"poisson\" losses actually implement \"half least squares loss\" and \"half poisson deviance\" to simplify the computation of the gradient. Furthermore, \"poisson\" loss internally uses a log-link and requires ``y >= 0`` .. versionchanged:: 0.23 Added option 'poisson'." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The learning rate, also known as *shrinkage*. This is used as a multiplicative factor for the leaves values. Use ``1`` for no shrinkage." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations of the boosting process, i.e. the maximum number of trees." - }, - { - "name": "max_leaf_nodes", - "type": "Optional[int]", - "hasDefault": true, - "default": "31", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of leaves for each tree. Must be strictly greater than 1. If None, there is no maximum limit." - }, - { - "name": "max_depth", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. The depth of a tree is the number of edges to go from the root to the deepest leaf. Depth isn't constrained by default." - }, - { - "name": "min_samples_leaf", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples per leaf. For small datasets with less than a few hundred samples, it is recommended to lower this value since only very shallow trees would be built." - }, - { - "name": "l2_regularization", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The L2 regularization parameter. Use ``0`` for no regularization (default)." - }, - { - "name": "max_bins", - "type": "int", - "hasDefault": true, - "default": "255", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of bins to use for non-missing values. Before training, each feature of the input array `X` is binned into integer-valued bins, which allows for a much faster training stage. Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255." - }, - { - "name": "monotonic_cst", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the monotonic constraint to enforce on each feature. -1, 1 and 0 respectively correspond to a negative constraint, positive constraint and no constraint. Read more in the :ref:`User Guide `. .. versionadded:: 0.23" - }, - { - "name": "categorical_features", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the categorical features. - None : no feature will be considered categorical. - boolean array-like : boolean mask indicating categorical features. - integer array-like : integer indices indicating categorical features. For each categorical feature, there must be at most `max_bins` unique categories, and each categorical value must be in [0, max_bins -1]. Read more in the :ref:`User Guide `. .. versionadded:: 0.24" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the estimator should be re-trained on the same data only. See :term:`the Glossary `." - }, - { - "name": "early_stopping", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', early stopping is enabled if the sample size is larger than 10000. If True, early stopping is enabled, otherwise early stopping is disabled. .. versionadded:: 0.23" - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "'loss'", - "limitation": null, - "ignored": false, - "docstring": "Scoring parameter to use for early stopping. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If ``scoring='loss'``, early stopping is checked w.r.t the loss value. Only used if early stopping is performed." - }, - { - "name": "validation_fraction", - "type": "Optional[Union[float, int]]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. Only used if early stopping is performed." - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Used to determine when to \"early stop\". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some tolerance. Only used if early stopping is performed." - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "The absolute tolerance to use when comparing scores during early stopping. The higher the tolerance, the more likely we are to early stop: higher tolerance means that it will be harder for subsequent iterations to be considered an improvement upon the reference score." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level. If not zero, print some information about the fitting process." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the subsampling in the binning process, and the train/validation data split if early stopping is enabled. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict values for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The predicted values." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict regression target for each iteration\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted values of the input samples, for each iteration." - }, - { - "name": "_encode_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'least_squares', 'least_absolute_deviation', 'poisson'}, default='least_squares'\n The loss function to use in the boosting process. Note that the\n \"least squares\" and \"poisson\" losses actually implement\n \"half least squares loss\" and \"half poisson deviance\" to simplify the\n computation of the gradient. Furthermore, \"poisson\" loss internally\n uses a log-link and requires ``y >= 0``\n\n .. versionchanged:: 0.23\n Added option 'poisson'.\n\nlearning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\nmax_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees.\nmax_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\nl2_regularization : float, default=0\n The L2 regularization parameter. Use ``0`` for no regularization\n (default).\nmax_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\nmonotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\ncategorical_features : array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\nearly_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used. If\n ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\ntol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores during early\n stopping. The higher the tolerance, the more likely we are to early\n stop: higher tolerance means that it will be harder for subsequent\n iterations to be considered an improvement upon the reference score.\nverbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ndo_early_stopping_ : bool\n Indicates whether early stopping is used during training.\nn_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n The number of tree that are built at each iteration. For regressors,\n this is always 1.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\nExamples\n--------\n>>> # To use this experimental feature, we need to explicitly ask for it:\n>>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n>>> from sklearn.ensemble import HistGradientBoostingRegressor\n>>> from sklearn.datasets import load_diabetes\n>>> X, y = load_diabetes(return_X_y=True)\n>>> est = HistGradientBoostingRegressor().fit(X, y)\n>>> est.score(X, y)\n0.92..." - }, - { - "name": "HistGradientBoostingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "Literal['auto', 'binary_crossentropy', 'categorical_crossentropy']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function to use in the boosting process. 'binary_crossentropy' (also known as logistic loss) is used for binary classification and generalizes to 'categorical_crossentropy' for multiclass classification. 'auto' will automatically choose either loss depending on the nature of the problem." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The learning rate, also known as *shrinkage*. This is used as a multiplicative factor for the leaves values. Use ``1`` for no shrinkage." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations of the boosting process, i.e. the maximum number of trees for binary classification. For multiclass classification, `n_classes` trees per iteration are built." - }, - { - "name": "max_leaf_nodes", - "type": "Optional[int]", - "hasDefault": true, - "default": "31", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of leaves for each tree. Must be strictly greater than 1. If None, there is no maximum limit." - }, - { - "name": "max_depth", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. The depth of a tree is the number of edges to go from the root to the deepest leaf. Depth isn't constrained by default." - }, - { - "name": "min_samples_leaf", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples per leaf. For small datasets with less than a few hundred samples, it is recommended to lower this value since only very shallow trees would be built." - }, - { - "name": "l2_regularization", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The L2 regularization parameter. Use 0 for no regularization." - }, - { - "name": "max_bins", - "type": "int", - "hasDefault": true, - "default": "255", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of bins to use for non-missing values. Before training, each feature of the input array `X` is binned into integer-valued bins, which allows for a much faster training stage. Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255." - }, - { - "name": "monotonic_cst", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the monotonic constraint to enforce on each feature. -1, 1 and 0 respectively correspond to a negative constraint, positive constraint and no constraint. Read more in the :ref:`User Guide `. .. versionadded:: 0.23" - }, - { - "name": "categorical_features", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the categorical features. - None : no feature will be considered categorical. - boolean array-like : boolean mask indicating categorical features. - integer array-like : integer indices indicating categorical features. For each categorical feature, there must be at most `max_bins` unique categories, and each categorical value must be in [0, max_bins -1]. Read more in the :ref:`User Guide `. .. versionadded:: 0.24" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the estimator should be re-trained on the same data only. See :term:`the Glossary `." - }, - { - "name": "early_stopping", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', early stopping is enabled if the sample size is larger than 10000. If True, early stopping is enabled, otherwise early stopping is disabled. .. versionadded:: 0.23" - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "'loss'", - "limitation": null, - "ignored": false, - "docstring": "Scoring parameter to use for early stopping. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If ``scoring='loss'``, early stopping is checked w.r.t the loss value. Only used if early stopping is performed." - }, - { - "name": "validation_fraction", - "type": "Optional[Union[float, int]]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. Only used if early stopping is performed." - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Used to determine when to \"early stop\". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some tolerance. Only used if early stopping is performed." - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "The absolute tolerance to use when comparing scores. The higher the tolerance, the more likely we are to early stop: higher tolerance means that it will be harder for subsequent iterations to be considered an improvement upon the reference score." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level. If not zero, print some information about the fitting process." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the subsampling in the binning process, and the train/validation data split if early stopping is enabled. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict classes for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The predicted classes." - }, - { - "name": "staged_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict classes at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted classes of the input samples, for each iteration." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\np : ndarray, shape (n_samples, n_classes)\n The class probabilities of the input samples." - }, - { - "name": "staged_predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted class probabilities of the input samples,\n for each iteration." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ndecision : ndarray, shape (n_samples,) or (n_samples, n_trees_per_iteration)\n The raw predicted values (i.e. the sum of the trees leaves) for\n each sample. n_trees_per_iteration is equal to the number of\n classes in multiclass classification." - }, - { - "name": "staged_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ndecision : generator of ndarray of shape (n_samples,) or (n_samples, n_trees_per_iteration)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "_encode_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'auto', 'binary_crossentropy', 'categorical_crossentropy'}, default='auto'\n The loss function to use in the boosting process. 'binary_crossentropy'\n (also known as logistic loss) is used for binary classification and\n generalizes to 'categorical_crossentropy' for multiclass\n classification. 'auto' will automatically choose either loss depending\n on the nature of the problem.\nlearning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\nmax_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees for binary classification. For multiclass\n classification, `n_classes` trees per iteration are built.\nmax_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\nl2_regularization : float, default=0\n The L2 regularization parameter. Use 0 for no regularization.\nmax_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\nmonotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\ncategorical_features : array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\nearly_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer\n is used. If ``scoring='loss'``, early stopping is checked\n w.r.t the loss value. Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\ntol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores. The higher the\n tolerance, the more likely we are to early stop: higher tolerance\n means that it will be harder for subsequent iterations to be\n considered an improvement upon the reference score.\nverbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : array, shape = (n_classes,)\n Class labels.\ndo_early_stopping_ : bool\n Indicates whether early stopping is used during training.\nn_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n The number of tree that are built at each iteration. This is equal to 1\n for binary classification, and to ``n_classes`` for multiclass\n classification.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\nExamples\n--------\n>>> # To use this experimental feature, we need to explicitly ask for it:\n>>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n>>> from sklearn.ensemble import HistGradientBoostingClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = HistGradientBoostingClassifier().fit(X, y)\n>>> clf.score(X, y)\n1.0" - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.grower", - "imports": [ - "from heapq import heappush", - "from heapq import heappop", - "import numpy as np", - "from timeit import default_timer as time", - "import numbers", - "from splitting import Splitter", - "from histogram import HistogramBuilder", - "from predictor import TreePredictor", - "from utils import sum_parallel", - "from common import PREDICTOR_RECORD_DTYPE", - "from common import X_BITSET_INNER_DTYPE", - "from common import Y_DTYPE", - "from common import MonotonicConstraint", - "from _bitset import set_raw_bitset_from_binned_bitset" - ], - "classes": [ - { - "name": "TreeNode", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "depth", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The depth of the node, i.e. its distance from the root." - }, - { - "name": "sample_indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the samples at the node." - }, - { - "name": "sum_gradients", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sum of the gradients of the samples at the node." - }, - { - "name": "sum_hessians", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sum of the hessians of the samples at the node." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_children_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set children values bounds to respect monotonic constraints." - }, - { - "name": "__lt__", - "decorators": [], - "parameters": [ - { - "name": "other_node", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The node to compare with." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Comparison for priority queue.\n\nNodes with high gain are higher priority than nodes with low gain.\n\nheapq.heappush only need the '<' operator.\nheapq.heappop take the smallest item first (smaller is higher\npriority).\n\nParameters\n----------\nother_node : TreeNode\n The node to compare with." - } - ], - "docstring": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).\n\nParameters\n----------\ndepth : int\n The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\nsum_gradients : float\n The sum of the gradients of the samples at the node.\nsum_hessians : float\n The sum of the hessians of the samples at the node.\n\nAttributes\n----------\ndepth : int\n The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\nsum_gradients : float\n The sum of the gradients of the samples at the node.\nsum_hessians : float\n The sum of the hessians of the samples at the node.\nsplit_info : SplitInfo or None\n The result of the split evaluation.\nleft_child : TreeNode or None\n The left child of the node. None for leaves.\nright_child : TreeNode or None\n The right child of the node. None for leaves.\nvalue : float or None\n The value of the leaf, as computed in finalize_leaf(). None for\n non-leaf nodes.\npartition_start : int\n start position of the node's sample_indices in splitter.partition.\npartition_stop : int\n stop position of the node's sample_indices in splitter.partition." - }, - { - "name": "TreeGrower", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "X_binned", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The binned input samples. Must be Fortran-aligned." - }, - { - "name": "gradients", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The gradients of each training sample. Those are the gradients of the loss w.r.t the predictions, evaluated at iteration ``i - 1``." - }, - { - "name": "hessians", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The hessians of each training sample. Those are the hessians of the loss w.r.t the predictions, evaluated at iteration ``i - 1``." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of leaves for each tree. If None, there is no maximum limit." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of each tree. The depth of a tree is the number of edges to go from the root to the deepest leaf. Depth isn't constrained by default." - }, - { - "name": "min_samples_leaf", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples per leaf." - }, - { - "name": "min_gain_to_split", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum gain needed to split a node. Splits with lower gain will be ignored." - }, - { - "name": "n_bins", - "type": "int", - "hasDefault": true, - "default": "256", - "limitation": null, - "ignored": false, - "docstring": "The total number of bins, including the bin for missing values. Used to define the shape of the histograms." - }, - { - "name": "n_bins_non_missing", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For each feature, gives the number of bins actually used for non-missing values. For features with a lot of unique values, this is equal to ``n_bins - 1``. If it's an int, all features are considered to have the same number of bins. If None, all features are considered to have ``n_bins - 1`` bins." - }, - { - "name": "has_missing_values", - "type": "Union[NDArray, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether each feature contains missing values (in the training data). If it's a bool, the same value is used for all features." - }, - { - "name": "is_categorical", - "type": "NDArray[bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates categorical features." - }, - { - "name": "monotonic_cst", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the monotonic constraint to enforce on each feature. -1, 1 and 0 respectively correspond to a positive constraint, negative constraint and no constraint. Read more in the :ref:`User Guide `." - }, - { - "name": "l2_regularization", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The L2 regularization parameter." - }, - { - "name": "min_hessian_to_split", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The minimum sum of hessians needed in each node. Splits that result in at least one child having a sum of hessians less than ``min_hessian_to_split`` are discarded." - }, - { - "name": "shrinkage", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The shrinkage parameter to apply to the leaves values, also known as learning rate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate parameters passed to __init__.\n\nAlso validate parameters passed to splitter." - }, - { - "name": "grow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Grow the tree, from root to leaves." - }, - { - "name": "_apply_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Multiply leaves values by shrinkage parameter.\n\nThis must be done at the very end of the growing process. If this were\ndone during the growing process e.g. in finalize_leaf(), then a leaf\nwould be shrunk but its sibling would potentially not be (if it's a\nnon-leaf), which would lead to a wrong computation of the 'middle'\nvalue needed to enforce the monotonic constraints." - }, - { - "name": "_intilialize_root", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize root node and finalize it if needed." - }, - { - "name": "_compute_best_split_and_push", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the best possible split (SplitInfo) of a given node.\n\nAlso push it in the heap of splittable nodes if gain isn't zero.\nThe gain of a node is 0 if either all the leaves are pure\n(best gain = 0), or if no split would satisfy the constraints,\n(min_hessians_to_split, min_gain_to_split, min_samples_leaf)" - }, - { - "name": "split_next", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split the node with highest potential gain.\n\nReturns\n-------\nleft : TreeNode\n The resulting left child.\nright : TreeNode\n The resulting right child." - }, - { - "name": "_finalize_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make node a leaf of the tree being grown." - }, - { - "name": "_finalize_splittable_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform all splittable nodes into leaves.\n\nUsed when some constraint is met e.g. maximum number of leaves or\nmaximum depth." - }, - { - "name": "make_predictor", - "decorators": [], - "parameters": [ - { - "name": "binning_thresholds", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Corresponds to the bin_thresholds_ attribute of the BinMapper. For each feature, this stores: - the bin frontiers for continuous features - the unique raw category values for categorical features" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Make a TreePredictor object out of the current tree.\n\nParameters\n----------\nbinning_thresholds : array-like of floats\n Corresponds to the bin_thresholds_ attribute of the BinMapper.\n For each feature, this stores:\n\n - the bin frontiers for continuous features\n - the unique raw category values for categorical features\n\nReturns\n-------\nA TreePredictor object." - } - ], - "docstring": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.\n\nParameters\n----------\nX_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n The binned input samples. Must be Fortran-aligned.\ngradients : ndarray of shape (n_samples,)\n The gradients of each training sample. Those are the gradients of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nhessians : ndarray of shape (n_samples,)\n The hessians of each training sample. Those are the hessians of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nmax_leaf_nodes : int, default=None\n The maximum number of leaves for each tree. If None, there is no\n maximum limit.\nmax_depth : int, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf.\nmin_gain_to_split : float, default=0.\n The minimum gain needed to split a node. Splits with lower gain will\n be ignored.\nn_bins : int, default=256\n The total number of bins, including the bin for missing values. Used\n to define the shape of the histograms.\nn_bins_non_missing : ndarray, dtype=np.uint32, default=None\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this\n is equal to ``n_bins - 1``. If it's an int, all features are\n considered to have the same number of bins. If None, all features\n are considered to have ``n_bins - 1`` bins.\nhas_missing_values : bool or ndarray, dtype=bool, default=False\n Whether each feature contains missing values (in the training data).\n If it's a bool, the same value is used for all features.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features.\nmonotonic_cst : array-like of shape (n_features,), dtype=int, default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a positive constraint, negative\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\nl2_regularization : float, default=0.\n The L2 regularization parameter.\nmin_hessian_to_split : float, default=1e-3\n The minimum sum of hessians needed in each node. Splits that result in\n at least one child having a sum of hessians less than\n ``min_hessian_to_split`` are discarded.\nshrinkage : float, default=1.\n The shrinkage parameter to apply to the leaves values, also known as\n learning rate." - } - ], - "functions": [ - { - "name": "_fill_predictor_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper used in make_predictor to set the TreePredictor fields." - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.loss", - "imports": [ - "from abc import ABC", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import expit", - "from scipy.special import logsumexp", - "from scipy.special import xlogy", - "from common import Y_DTYPE", - "from common import G_H_DTYPE", - "from _loss import _update_gradients_least_squares", - "from _loss import _update_gradients_hessians_least_squares", - "from _loss import _update_gradients_least_absolute_deviation", - "from _loss import _update_gradients_hessians_least_absolute_deviation", - "from _loss import _update_gradients_hessians_binary_crossentropy", - "from _loss import _update_gradients_hessians_categorical_crossentropy", - "from _loss import _update_gradients_hessians_poisson", - "from utils.stats import _weighted_percentile" - ], - "classes": [ - { - "name": "BaseLoss", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the weighted average loss" - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return loss value for each input" - }, - { - "name": "init_gradients_and_hessians", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of samples passed to `fit()`." - }, - { - "name": "prediction_dim", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimension of a raw prediction, i.e. the number of trees built at each iteration. Equals 1 for regression and binary classification, or K where K is the number of classes for multiclass classification." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return initial gradients and hessians.\n\nUnless hessians are constant, arrays are initialized with undefined\nvalues.\n\nParameters\n----------\nn_samples : int\n The number of samples passed to `fit()`.\n\nprediction_dim : int\n The dimension of a raw prediction, i.e. the number of trees\n built at each iteration. Equals 1 for regression and binary\n classification, or K where K is the number of classes for\n multiclass classification.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\nReturns\n-------\ngradients : ndarray, shape (prediction_dim, n_samples)\n The initial gradients. The array is not initialized.\nhessians : ndarray, shape (prediction_dim, n_samples)\n If hessians are constant (e.g. for `LeastSquares` loss, the\n array is initialized to ``1``. Otherwise, the array is allocated\n without being initialized." - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [ - { - "name": "y_train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target training values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data." - }, - { - "name": "prediction_dim", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimension of one prediction: 1 for binary classification and regression, n_classes for multiclass classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return initial predictions (before the first iteration).\n\nParameters\n----------\ny_train : ndarray, shape (n_samples,)\n The target training values.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\nprediction_dim : int\n The dimension of one prediction: 1 for binary classification and\n regression, n_classes for multiclass classification.\n\nReturns\n-------\nbaseline_prediction : float or ndarray, shape (1, prediction_dim)\n The baseline prediction." - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [ - { - "name": "gradients", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The gradients (treated as OUT array)." - }, - { - "name": "hessians", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The hessians (treated as OUT array)." - }, - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The true target values or each training sample." - }, - { - "name": "raw_predictions", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raw_predictions (i.e. values from the trees) of the tree ensemble at iteration ``i - 1``." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights of training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update gradients and hessians arrays, inplace.\n\nThe gradients (resp. hessians) are the first (resp. second) order\nderivatives of the loss for each sample with respect to the\npredictions of model, evaluated at iteration ``i - 1``.\n\nParameters\n----------\ngradients : ndarray, shape (prediction_dim, n_samples)\n The gradients (treated as OUT array).\n\nhessians : ndarray, shape (prediction_dim, n_samples) or (1,)\n The hessians (treated as OUT array).\n\ny_true : ndarray, shape (n_samples,)\n The true target values or each training sample.\n\nraw_predictions : ndarray, shape (prediction_dim, n_samples)\n The raw_predictions (i.e. values from the trees) of the tree\n ensemble at iteration ``i - 1``.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data." - } - ], - "docstring": "Base class for a loss." - }, - { - "name": "LeastSquares", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_link_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Least squares loss, for regression.\n\nFor a given sample x_i, least squares loss is defined as::\n\n loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2\n\nThis actually computes the half least squares loss to simplify\nthe computation of the gradients and get a unit hessian (and be consistent\nwith what is done in LightGBM)." - }, - { - "name": "LeastAbsoluteDeviation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_link_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_leaves_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Least absolute deviation, for regression.\n\nFor a given sample x_i, the loss is defined as::\n\n loss(x_i) = |y_true_i - raw_pred_i|" - }, - { - "name": "Poisson", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Poisson deviance loss with log-link, for regression.\n\nFor a given sample x_i, Poisson deviance loss is defined as::\n\n loss(x_i) = y_true_i * log(y_true_i/exp(raw_pred_i))\n - y_true_i + exp(raw_pred_i))\n\nThis actually computes half the Poisson deviance to simplify\nthe computation of the gradients." - }, - { - "name": "BinaryCrossEntropy", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binary cross-entropy loss, for binary classification.\n\nFor a given sample x_i, the binary cross-entropy loss is defined as the\nnegative log-likelihood of the model which can be expressed as::\n\n loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression)." - }, - { - "name": "CategoricalCrossEntropy", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pointwise_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_baseline_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_gradients_and_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Categorical cross-entropy loss, for multiclass classification.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the model and generalizes the binary\ncross-entropy to more than 2 classes." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.predictor", - "imports": [ - "import numpy as np", - "from common import Y_DTYPE", - "from _predictor import _predict_from_raw_data", - "from _predictor import _predict_from_binned_data", - "from _predictor import _compute_partial_dependence" - ], - "classes": [ - { - "name": "TreePredictor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "nodes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The nodes of the tree." - }, - { - "name": "binned_left_cat_bitsets", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of bitsets for binned categories used in predict_binned when a split is categorical." - }, - { - "name": "raw_left_cat_bitsets", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of bitsets for raw categories used in predict when a split is categorical." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_leaf_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return number of leaves." - }, - { - "name": "get_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return maximum depth among all leaves." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "known_cat_bitsets", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of bitsets of known categories, for each categorical feature." - }, - { - "name": "f_idx_map", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Map from original feature index to the corresponding index in the known_cat_bitsets array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict raw values for non-binned data.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n The input samples.\n\nknown_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n\nf_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The raw predicted values." - }, - { - "name": "predict_binned", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "missing_values_bin_idx", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the bin that is used for missing values. This is the index of the last bin and is always equal to max_bins (as passed to the GBDT classes), or equivalently to n_bins - 1." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict raw values for binned data.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n The input samples.\nmissing_values_bin_idx : uint8\n Index of the bin that is used for missing values. This is the\n index of the last bin and is always equal to max_bins (as passed\n to the GBDT classes), or equivalently to n_bins - 1.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The raw predicted values." - }, - { - "name": "compute_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - }, - { - "name": "out", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The value of the partial dependence function on each grid point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\nout : ndarray, shape (n_samples)\n The value of the partial dependence function on each grid\n point." - } - ], - "docstring": "Tree class used for predictions.\n\nParameters\n----------\nnodes : ndarray of PREDICTOR_RECORD_DTYPE\n The nodes of the tree.\nbinned_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32\n Array of bitsets for binned categories used in predict_binned when a\n split is categorical.\nraw_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32\n Array of bitsets for raw categories used in predict when a split is\n categorical." - } - ], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_binning", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.binning import _find_binning_thresholds", - "from sklearn.ensemble._hist_gradient_boosting.binning import _map_to_bins", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF" - ], - "classes": [], - "functions": [ - { - "name": "test_find_binning_thresholds_regular_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_find_binning_thresholds_small_regular_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_find_binning_thresholds_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_find_binning_thresholds_low_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_n_features_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_map_to_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_small_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_identity_repeated_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_repeated_values_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_identity_small", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bin_mapper_idempotence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_bins_non_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_with_numerical_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_known_categories_bitsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_bitset", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_bitset_memoryview", - "from sklearn.ensemble._hist_gradient_boosting._bitset import in_bitset_memoryview", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_raw_bitset_from_binned_bitset", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE" - ], - "classes": [], - "functions": [ - { - "name": "test_set_get_bitset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raw_bitset_from_binned_bitset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_compare_lightgbm", - "imports": [ - "from sklearn.model_selection import train_test_split", - "from sklearn.metrics import accuracy_score", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "import numpy as np", - "import pytest", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator" - ], - "classes": [], - "functions": [ - { - "name": "test_same_predictions_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_predictions_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_predictions_multiclass_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_gradient_boosting", - "imports": [ - "import numpy as np", - "import pytest", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_array_equal", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_low_rank_matrix", - "from sklearn.preprocessing import KBinsDiscretizer", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.base import clone", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.base import is_regressor", - "from sklearn.pipeline import make_pipeline", - "from sklearn.metrics import mean_poisson_deviance", - "from sklearn.dummy import DummyRegressor", - "from sklearn.exceptions import NotFittedError", - "from sklearn.compose import make_column_transformer", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.ensemble._hist_gradient_boosting.loss import _LOSSES", - "from sklearn.ensemble._hist_gradient_boosting.loss import LeastSquares", - "from sklearn.ensemble._hist_gradient_boosting.loss import BinaryCrossEntropy", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.utils import shuffle" - ], - "classes": [], - "functions": [ - { - "name": "_make_dumb_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a dumb dataset to test early stopping." - }, - { - "name": "test_init_parameters_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_classification_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_should_stop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_least_absolute_deviation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_y_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binning_train_validation_are_separated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_trivial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_resilience", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_division_hessians", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_values_minmax_imputation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_lengths", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_crossentropy_binary_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_string_target_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_sample_weights_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_sample_weights_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_effect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sum_hessians_are_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_depth_max_leaf_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_on_test_set_with_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_node_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_staged_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unknown_categories_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_encoding_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_spec_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_spec_no_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_bad_encoding_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uint8_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_grower", - "imports": [ - "import numpy as np", - "import pytest", - "from pytest import approx", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE" - ], - "classes": [], - "functions": [ - { - "name": "_make_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_children_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grow_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predictor_from_grower", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_leaf_root", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_is_stump", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_parameters_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_value_predict_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_split_on_nan_with_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grow_tree_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ohe_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_histogram", - "imports": [ - "import numpy as np", - "import pytest", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_array_equal", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_naive", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_no_hessian", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_root_no_hessian", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _build_histogram_root", - "from sklearn.ensemble._hist_gradient_boosting.histogram import _subtract_histograms", - "from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE" - ], - "classes": [], - "functions": [ - { - "name": "test_build_histogram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_histogram_sample_order_independence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unrolled_equivalent_to_naive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hist_subtraction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_loss", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_allclose", - "from scipy.optimize import newton", - "from scipy.special import logit", - "from sklearn.utils import assert_all_finite", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.loss import _LOSSES", - "from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.utils._testing import skip_if_32bit" - ], - "classes": [], - "functions": [ - { - "name": "get_derivatives_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return get_gradients() and get_hessians() functions for a given loss.\n " - }, - { - "name": "test_derivatives", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numerical_gradients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_least_squares", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_least_absolute_deviation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_poisson", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_binary_crossentropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_baseline_categorical_crossentropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_multiplies_gradients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_gradient_and_hessians_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_monotonic_contraints", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint", - "from sklearn.ensemble._hist_gradient_boosting.splitting import Splitter", - "from sklearn.ensemble._hist_gradient_boosting.splitting import compute_node_value", - "from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier" - ], - "classes": [], - "functions": [ - { - "name": "is_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_decreasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_leaves_values_monotonic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_children_values_monotonic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_children_values_bounded", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nodes_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bounded_value_min_gain_to_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_predictor", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import make_regression", - "from sklearn.model_selection import train_test_split", - "from sklearn.metrics import r2_score", - "import pytest", - "from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper", - "from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower", - "from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import PREDICTOR_RECORD_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_bitset_memoryview", - "from sklearn.ensemble._hist_gradient_boosting._bitset import set_raw_bitset_from_binned_bitset" - ], - "classes": [], - "functions": [ - { - "name": "test_regression_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_infinite_values_and_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categorical_predictor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_splitting", - "imports": [ - "import numpy as np", - "import pytest", - "from numpy.testing import assert_array_equal", - "from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE", - "from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint", - "from sklearn.ensemble._hist_gradient_boosting.splitting import Splitter", - "from sklearn.ensemble._hist_gradient_boosting.splitting import compute_node_value", - "from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder", - "from sklearn.utils._testing import skip_if_32bit" - ], - "classes": [], - "functions": [ - { - "name": "test_histogram_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_and_hessian_sanity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_split_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_gain_to_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_splitting_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_splitting_categorical_cat_smooth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_assert_categories_equals_bitset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_splitting_categorical_sanity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests.test_warm_start", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "import pytest", - "from sklearn.base import clone", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.metrics import check_scoring" - ], - "classes": [], - "functions": [ - { - "name": "_assert_predictor_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert that two HistGBM instances are identical." - }, - { - "name": "test_max_iter_with_warm_start_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_yields_identical_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_equal_n_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_clear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_seeds_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.ensemble._hist_gradient_boosting.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.enable_halving_search_cv", - "imports": [ - "from model_selection._search_successive_halving import HalvingRandomSearchCV", - "from model_selection._search_successive_halving import HalvingGridSearchCV", - "from None import model_selection" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.enable_hist_gradient_boosting", - "imports": [ - "from ensemble._hist_gradient_boosting.gradient_boosting import HistGradientBoostingClassifier", - "from ensemble._hist_gradient_boosting.gradient_boosting import HistGradientBoostingRegressor", - "from None import ensemble" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.enable_iterative_imputer", - "imports": [ - "from impute._iterative import IterativeImputer", - "from None import impute" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.experimental.tests.test_enable_hist_gradient_boosting", - "imports": [ - "import textwrap", - "from sklearn.utils._testing import assert_run_python_script" - ], - "classes": [], - "functions": [ - { - "name": "test_imports_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.experimental.tests.test_enable_iterative_imputer", - "imports": [ - "import textwrap", - "from sklearn.utils._testing import assert_run_python_script" - ], - "classes": [], - "functions": [ - { - "name": "test_imports_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.experimental.tests.test_enable_successive_halving", - "imports": [ - "import textwrap", - "from sklearn.utils._testing import assert_run_python_script" - ], - "classes": [], - "functions": [ - { - "name": "test_imports_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.experimental.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.externals.conftest", - "imports": [], - "classes": [], - "functions": [ - { - "name": "pytest_ignore_collect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.externals._arff", - "imports": [ - "from typing import TYPE_CHECKING", - "from typing import Optional", - "from typing import List", - "from typing import Dict", - "from typing import Any", - "from typing import Iterator", - "from typing import Union", - "from typing import Tuple", - "import re", - "import sys", - "import csv", - "from typing_extensions import TypedDict", - "from itertools import izip as zip" - ], - "classes": [ - { - "name": "ArffContainerType", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "ArffException", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__str__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "BadRelationFormat", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when the relation declaration is in an invalid format." - }, - { - "name": "BadAttributeFormat", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when some attribute declaration is in an invalid format." - }, - { - "name": "BadDataFormat", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when some data instance is in an invalid format." - }, - { - "name": "BadAttributeType", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when some invalid type is provided into the attribute\ndeclaration." - }, - { - "name": "BadAttributeName", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when an attribute name is provided twice the attribute\ndeclaration." - }, - { - "name": "BadNominalValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when a value in used in some data instance but is not\ndeclared into it respective attribute declaration." - }, - { - "name": "BadNominalFormatting", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when a nominal value with space is not properly quoted." - }, - { - "name": "BadNumericalValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raised when and invalid numerical value is used in some data\ninstance." - }, - { - "name": "BadStringValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Error raise when a string contains space but is not quoted." - }, - { - "name": "BadLayout", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when the layout of the ARFF file has something wrong." - }, - { - "name": "BadObject", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__str__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Error raised when the object representing the ARFF file has something\nwrong." - }, - { - "name": "EncodedNominalConversor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NominalConversor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DenseGeneratorData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_decode_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Encodes a line of data.\n\nData instances follow the csv format, i.e, attribute values are\ndelimited by commas. After converted from csv.\n\n:param data: a list of values.\n:param attributes: a list of attributes. Used to check if data is valid.\n:return: a string with the encoded data line." - } - ], - "docstring": "Internal helper class to allow for different matrix types without\nmaking the code a huge collection of if statements." - }, - { - "name": "_DataListMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mixin to return a list from decode_rows instead of a generator" - }, - { - "name": "Data", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "COOData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "LODGeneratorData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "LODData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "ArffDecoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Constructor." - }, - { - "name": "_decode_comment", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded comment.\n " - }, - { - "name": "_decode_relation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string. The string must\n start with alphabetic character and must be quoted if the name includes\n spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded relation name.\n " - }, - { - "name": "_decode_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes an attribute line.\n\n The attribute is the most complex declaration in an arff file. All\n attributes must follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, quoted if the name contains any\n whitespace, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n The nominal names follow the rules for the attribute names, i.e., they\n must be quoted if the name contains whitespaces.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).\n " - }, - { - "name": "_decode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Do the job the ``encode``." - }, - { - "name": "decode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_." - } - ], - "docstring": "An ARFF decoder." - }, - { - "name": "ArffEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_encode_comment", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Encodes a comment line.\n\nComments are single line strings starting, obligatorily, with the ``%``\ncharacter, and can have any symbol, including whitespaces or special\ncharacters.\n\nIf ``s`` is None, this method will simply return an empty comment.\n\n:param s: (OPTIONAL) string.\n:return: a string with the encoded comment line." - }, - { - "name": "_encode_relation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Decodes a relation line.\n\nThe relation declaration is a line with the format ``@RELATION\n``, where ``relation-name`` is a string.\n\n:param name: a string.\n:return: a string with the encoded relation declaration." - }, - { - "name": "_encode_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n @attribute \n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n {, , , ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration." - }, - { - "name": "encode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Encodes a given object to an ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: the ARFF file as an unicode string." - }, - { - "name": "iter_encode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The iterative version of `arff.ArffEncoder.encode`.\n\nThis encodes iteratively a given object and return, one-by-one, the\nlines of the ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: (yields) the ARFF file as unicode strings." - } - ], - "docstring": "An ARFF encoder." - } - ], - "functions": [ - { - "name": "_build_re_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_escape_sub_callback", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_unquote", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_parse_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "(INTERNAL) Split a line into a list of values" - }, - { - "name": "_unescape_sub_callback", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "encode_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_object_for_decoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_data_object_for_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "load", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Load a file-like object containing the ARFF document and convert it into\na Python object.\n\n:param fp: a file-like object.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.\n:return: a dictionary.\n " - }, - { - "name": "loads", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert a string instance containing the ARFF document into a Python\nobject.\n\n:param s: a string object.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.\n:return: a dictionary." - }, - { - "name": "dump", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Serialize an object representing the ARFF document to a given file-like\nobject.\n\n:param obj: a dictionary.\n:param fp: a file-like object." - }, - { - "name": "dumps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Serialize an object representing the ARFF document, returning a string.\n\n:param obj: a dictionary.\n:return: a string with the ARFF document." - } - ] - }, - { - "name": "sklearn.externals._lobpcg", - "imports": [ - "from __future__ import division", - "from __future__ import print_function", - "from __future__ import absolute_import", - "import numpy as np", - "from scipy.linalg import inv", - "from scipy.linalg import eigh", - "from scipy.linalg import cho_factor", - "from scipy.linalg import cho_solve", - "from scipy.linalg import cholesky", - "from scipy.linalg import orth", - "from scipy.linalg import LinAlgError", - "from scipy.sparse.linalg import aslinearoperator", - "import warnings", - "from scipy.linalg import norm" - ], - "classes": [], - "functions": [ - { - "name": "bmat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_save", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_report_nonhermitian", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Report if `M` is not a hermitian matrix given its type." - }, - { - "name": "_as2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "If the input array is 2D return it, if it is 1D, append a dimension,\nmaking it a column vector." - }, - { - "name": "_makeOperator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Takes a dense numpy array or a sparse matrix or\na function and makes an operator performing matrix * blockvector\nproducts." - }, - { - "name": "_applyConstraints", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Changes blockVectorV in place." - }, - { - "name": "_b_orthonormalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "B-orthonormalize the given block vector using Cholesky." - }, - { - "name": "_get_indx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get `num` indices into `_lambda` depending on `largest` option." - }, - { - "name": "lobpcg", - "decorators": [], - "parameters": [ - { - "name": "A", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The symmetric linear operator of the problem, usually a sparse matrix. Often called the \"stiffness matrix\"." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial approximation to the ``k`` eigenvectors (non-sparse). If `A` has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``." - }, - { - "name": "B", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The right hand side operator in a generalized eigenproblem. By default, ``B = Identity``. Often called the \"mass matrix\"." - }, - { - "name": "M", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preconditioner to `A`; by default ``M = Identity``. `M` should approximate the inverse of `A`." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-by-sizeY matrix of constraints (non-sparse), sizeY < n The iterations will be performed in the B-orthogonal complement of the column-space of Y. Y must be full rank." - }, - { - "name": "tol", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver tolerance (stopping criterion). The default is ``tol=n*sqrt(eps)``." - }, - { - "name": "maxiter", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. The default is ``maxiter=min(n, 20)``." - }, - { - "name": "largest", - "type": "Optional[bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When True, solve for the largest eigenvalues, otherwise the smallest." - }, - { - "name": "verbosityLevel", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls solver output. The default is ``verbosityLevel=0``." - }, - { - "name": "retLambdaHistory", - "type": "Optional[bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to return eigenvalue history. Default is False." - }, - { - "name": "retResidualNormsHistory", - "type": "Optional[bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to return history of residual norms. Default is False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)\n\nLOBPCG is a preconditioned eigensolver for large symmetric positive\ndefinite (SPD) generalized eigenproblems.\n\nParameters\n----------\nA : {sparse matrix, dense matrix, LinearOperator}\n The symmetric linear operator of the problem, usually a\n sparse matrix. Often called the \"stiffness matrix\".\nX : ndarray, float32 or float64\n Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\n has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.\nB : {dense matrix, sparse matrix, LinearOperator}, optional\n The right hand side operator in a generalized eigenproblem.\n By default, ``B = Identity``. Often called the \"mass matrix\".\nM : {dense matrix, sparse matrix, LinearOperator}, optional\n Preconditioner to `A`; by default ``M = Identity``.\n `M` should approximate the inverse of `A`.\nY : ndarray, float32 or float64, optional\n n-by-sizeY matrix of constraints (non-sparse), sizeY < n\n The iterations will be performed in the B-orthogonal complement\n of the column-space of Y. Y must be full rank.\ntol : scalar, optional\n Solver tolerance (stopping criterion).\n The default is ``tol=n*sqrt(eps)``.\nmaxiter : int, optional\n Maximum number of iterations. The default is ``maxiter=min(n, 20)``.\nlargest : bool, optional\n When True, solve for the largest eigenvalues, otherwise the smallest.\nverbosityLevel : int, optional\n Controls solver output. The default is ``verbosityLevel=0``.\nretLambdaHistory : bool, optional\n Whether to return eigenvalue history. Default is False.\nretResidualNormsHistory : bool, optional\n Whether to return history of residual norms. Default is False.\n\nReturns\n-------\nw : ndarray\n Array of ``k`` eigenvalues\nv : ndarray\n An array of ``k`` eigenvectors. `v` has the same shape as `X`.\nlambdas : list of ndarray, optional\n The eigenvalue history, if `retLambdaHistory` is True.\nrnorms : list of ndarray, optional\n The history of residual norms, if `retResidualNormsHistory` is True.\n\nNotes\n-----\nIf both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,\nthe return tuple has the following format\n``(lambda, V, lambda history, residual norms history)``.\n\nIn the following ``n`` denotes the matrix size and ``m`` the number\nof required eigenvalues (smallest or largest).\n\nThe LOBPCG code internally solves eigenproblems of the size ``3m`` on every\niteration by calling the \"standard\" dense eigensolver, so if ``m`` is not\nsmall enough compared to ``n``, it does not make sense to call the LOBPCG\ncode, but rather one should use the \"standard\" eigensolver, e.g. numpy or\nscipy function in this case.\nIf one calls the LOBPCG algorithm for ``5m > n``, it will most likely break\ninternally, so the code tries to call the standard function instead.\n\nIt is not that ``n`` should be large for the LOBPCG to work, but rather the\nratio ``n / m`` should be large. It you call LOBPCG with ``m=1``\nand ``n=10``, it works though ``n`` is small. The method is intended\nfor extremely large ``n / m``, see e.g., reference [28] in\nhttps://arxiv.org/abs/0705.2626\n\nThe convergence speed depends basically on two factors:\n\n1. How well relatively separated the seeking eigenvalues are from the rest\n of the eigenvalues. One can try to vary ``m`` to make this better.\n\n2. How well conditioned the problem is. This can be changed by using proper\n preconditioning. For example, a rod vibration test problem (under tests\n directory) is ill-conditioned for large ``n``, so convergence will be\n slow, unless efficient preconditioning is used. For this specific\n problem, a good simple preconditioner function would be a linear solve\n for `A`, which is easy to code since A is tridiagonal.\n\nReferences\n----------\n.. [1] A. V. Knyazev (2001),\n Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method.\n SIAM Journal on Scientific Computing 23, no. 2,\n pp. 517-541. http://dx.doi.org/10.1137/S1064827500366124\n\n.. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov\n (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers\n (BLOPEX) in hypre and PETSc. https://arxiv.org/abs/0705.2626\n\n.. [3] A. V. Knyazev's C and MATLAB implementations:\n https://bitbucket.org/joseroman/blopex\n\nExamples\n--------\n\nSolve ``A x = lambda x`` with constraints and preconditioning.\n\n>>> import numpy as np\n>>> from scipy.sparse import spdiags, issparse\n>>> from scipy.sparse.linalg import lobpcg, LinearOperator\n>>> n = 100\n>>> vals = np.arange(1, n + 1)\n>>> A = spdiags(vals, 0, n, n)\n>>> A.toarray()\narray([[ 1., 0., 0., ..., 0., 0., 0.],\n [ 0., 2., 0., ..., 0., 0., 0.],\n [ 0., 0., 3., ..., 0., 0., 0.],\n ...,\n [ 0., 0., 0., ..., 98., 0., 0.],\n [ 0., 0., 0., ..., 0., 99., 0.],\n [ 0., 0., 0., ..., 0., 0., 100.]])\n\nConstraints:\n\n>>> Y = np.eye(n, 3)\n\nInitial guess for eigenvectors, should have linearly independent\ncolumns. Column dimension = number of requested eigenvalues.\n\n>>> X = np.random.rand(n, 3)\n\nPreconditioner in the inverse of A in this example:\n\n>>> invA = spdiags([1./vals], 0, n, n)\n\nThe preconditiner must be defined by a function:\n\n>>> def precond( x ):\n... return invA @ x\n\nThe argument x of the preconditioner function is a matrix inside `lobpcg`,\nthus the use of matrix-matrix product ``@``.\n\nThe preconditioner function is passed to lobpcg as a `LinearOperator`:\n\n>>> M = LinearOperator(matvec=precond, matmat=precond,\n... shape=(n, n), dtype=float)\n\nLet us now solve the eigenvalue problem for the matrix A:\n\n>>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)\n>>> eigenvalues\narray([4., 5., 6.])\n\nNote that the vectors passed in Y are the eigenvectors of the 3 smallest\neigenvalues. The results returned are orthogonal to those." - } - ] - }, - { - "name": "sklearn.externals._pep562", - "imports": [ - "from __future__ import unicode_literals", - "import sys" - ], - "classes": [ - { - "name": "Pep562", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Acquire `__getattr__` and `__dir__`, but only replace module for versions less than Python 3.7." - }, - { - "name": "__dir__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the overridden `dir` if one was provided, else apply `dir` to the module." - }, - { - "name": "__getattr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present." - } - ], - "docstring": "Backport of PEP 562 .\n\nWraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`.\nThe given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed." - } - ], - "functions": [] - }, - { - "name": "sklearn.externals._pilutil", - "imports": [ - "from __future__ import division", - "from __future__ import print_function", - "from __future__ import absolute_import", - "import numpy", - "from numpy import amin", - "from numpy import amax", - "from numpy import ravel", - "from numpy import asarray", - "from numpy import arange", - "from numpy import ones", - "from numpy import newaxis", - "from numpy import transpose", - "from numpy import iscomplexobj", - "from numpy import uint8", - "from numpy import issubdtype", - "from numpy import array", - "from PIL import Image", - "import Image" - ], - "classes": [], - "functions": [ - { - "name": "bytescale", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "PIL image data array." - }, - { - "name": "cmin", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Bias scaling of small values. Default is ``data.min()``." - }, - { - "name": "cmax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Bias scaling of large values. Default is ``data.max()``." - }, - { - "name": "high", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scale max value to `high`. Default is 255." - }, - { - "name": "low", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scale min value to `low`. Default is 0." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Byte scales an array (image).\n\nByte scaling means converting the input image to uint8 dtype and scaling\nthe range to ``(low, high)`` (default 0-255).\nIf the input image already has dtype uint8, no scaling is done.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\ndata : ndarray\n PIL image data array.\ncmin : scalar, default=None\n Bias scaling of small values. Default is ``data.min()``.\ncmax : scalar, default=None\n Bias scaling of large values. Default is ``data.max()``.\nhigh : scalar, default=None\n Scale max value to `high`. Default is 255.\nlow : scalar, default=None\n Scale min value to `low`. Default is 0.\n\nReturns\n-------\nimg_array : uint8 ndarray\n The byte-scaled array.\n\nExamples\n--------\n>>> import numpy as np\n>>> from scipy.misc import bytescale\n>>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ],\n... [ 73.88003259, 80.91433048, 4.88878881],\n... [ 51.53875334, 34.45808177, 27.5873488 ]])\n>>> bytescale(img)\narray([[255, 0, 236],\n [205, 225, 4],\n [140, 90, 70]], dtype=uint8)\n>>> bytescale(img, high=200, low=100)\narray([[200, 100, 192],\n [180, 188, 102],\n [155, 135, 128]], dtype=uint8)\n>>> bytescale(img, cmin=0, cmax=255)\narray([[91, 3, 84],\n [74, 81, 5],\n [52, 34, 28]], dtype=uint8)" - }, - { - "name": "imread", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The file name or file object to be read." - }, - { - "name": "flatten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, flattens the color layers into a single gray-scale layer." - }, - { - "name": "mode", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mode to convert image to, e.g. ``'RGB'``. See the Notes for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Read an image from a file as an array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\nname : str or file object\n The file name or file object to be read.\nflatten : bool, default=False\n If True, flattens the color layers into a single gray-scale layer.\nmode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes for more\n details.\n\nReturns\n-------\nimread : ndarray\n The array obtained by reading the image.\n\nNotes\n-----\n`imread` uses the Python Imaging Library (PIL) to read an image.\nThe following notes are from the PIL documentation.\n\n`mode` can be one of the following strings:\n\n* 'L' (8-bit pixels, black and white)\n* 'P' (8-bit pixels, mapped to any other mode using a color palette)\n* 'RGB' (3x8-bit pixels, true color)\n* 'RGBA' (4x8-bit pixels, true color with transparency mask)\n* 'CMYK' (4x8-bit pixels, color separation)\n* 'YCbCr' (3x8-bit pixels, color video format)\n* 'I' (32-bit signed integer pixels)\n* 'F' (32-bit floating point pixels)\n\nPIL also provides limited support for a few special modes, including\n'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa'\n(true color with premultiplied alpha).\n\nWhen translating a color image to black and white (mode 'L', 'I' or\n'F'), the library uses the ITU-R 601-2 luma transform::\n\n L = R * 299/1000 + G * 587/1000 + B * 114/1000\n\nWhen `flatten` is True, the image is converted using mode 'F'.\nWhen `mode` is not None and `flatten` is True, the image is first\nconverted according to `mode`, and the result is then flattened using\nmode 'F'." - }, - { - "name": "imsave", - "decorators": [], - "parameters": [ - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Output file name or file object." - }, - { - "name": "arr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing image values. If the shape is ``MxN``, the array represents a grey-level image. Shape ``MxNx3`` stores the red, green and blue bands along the last dimension. An alpha layer may be included, specified as the last colour band of an ``MxNx4`` array." - }, - { - "name": "format", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Image format. If omitted, the format to use is determined from the file name extension. If a file object was used instead of a file name, this parameter should always be used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Save an array as an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nParameters\n----------\nname : str or file object\n Output file name or file object.\narr : ndarray, MxN or MxNx3 or MxNx4\n Array containing image values. If the shape is ``MxN``, the array\n represents a grey-level image. Shape ``MxNx3`` stores the red, green\n and blue bands along the last dimension. An alpha layer may be\n included, specified as the last colour band of an ``MxNx4`` array.\nformat : str, default=None\n Image format. If omitted, the format to use is determined from the\n file name extension. If a file object was used instead of a file name,\n this parameter should always be used.\n\nExamples\n--------\nConstruct an array of gradient intensity values and save to file:\n\n>>> import numpy as np\n>>> from scipy.misc import imsave\n>>> x = np.zeros((255, 255))\n>>> x = np.zeros((255, 255), dtype=np.uint8)\n>>> x[:] = np.arange(255)\n>>> imsave('gradient.png', x)\n\nConstruct an array with three colour bands (R, G, B) and store to file:\n\n>>> rgb = np.zeros((255, 255, 3), dtype=np.uint8)\n>>> rgb[..., 0] = np.arange(255)\n>>> rgb[..., 1] = 55\n>>> rgb[..., 2] = 1 - np.arange(255)\n>>> imsave('rgb_gradient.png', rgb)" - }, - { - "name": "fromimage", - "decorators": [], - "parameters": [ - { - "name": "im", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input image." - }, - { - "name": "flatten", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, convert the output to grey-scale." - }, - { - "name": "mode", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mode to convert image to, e.g. ``'RGB'``. See the Notes of the `imread` docstring for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a copy of a PIL image as a numpy array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\nim : PIL image\n Input image.\nflatten : bool, default=False\n If true, convert the output to grey-scale.\nmode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes of the\n `imread` docstring for more details.\n\nReturns\n-------\nfromimage : ndarray\n The different colour bands/channels are stored in the\n third dimension, such that a grey-image is MxN, an\n RGB-image MxNx3 and an RGBA-image MxNx4." - }, - { - "name": "toimage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Takes a numpy array and returns a PIL image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nThe mode of the PIL image depends on the array shape and the `pal` and\n`mode` keywords.\n\nFor 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values\n(from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode\nis given as 'F' or 'I' in which case a float and/or integer array is made.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nNotes\n-----\nFor 3-D arrays, the `channel_axis` argument tells which dimension of the\narray holds the channel data.\n\nFor 3-D arrays if one of the dimensions is 3, the mode is 'RGB'\nby default or 'YCbCr' if selected.\n\nThe numpy array must be either 2 dimensional or 3 dimensional." - }, - { - "name": "imresize", - "decorators": [], - "parameters": [ - { - "name": "arr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The array of image to be resized." - }, - { - "name": "size", - "type": "Union[float, int, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "* int - Percentage of current size. * float - Fraction of current size. * tuple - Size of the output image (height, width)." - }, - { - "name": "interp", - "type": "str", - "hasDefault": true, - "default": "'bilinear'", - "limitation": null, - "ignored": false, - "docstring": "Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear', 'bicubic' or 'cubic')." - }, - { - "name": "mode", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing. If ``mode=None`` (the default), 2-D images will be treated like ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays, `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Resize an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nParameters\n----------\narr : ndarray\n The array of image to be resized.\nsize : int, float or tuple\n * int - Percentage of current size.\n * float - Fraction of current size.\n * tuple - Size of the output image (height, width).\n\ninterp : str, default='bilinear'\n Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear',\n 'bicubic' or 'cubic').\nmode : str, default=None\n The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing.\n If ``mode=None`` (the default), 2-D images will be treated like\n ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays,\n `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively.\n\nReturns\n-------\nimresize : ndarray\n The resized array of image.\n\nSee Also\n--------\ntoimage : Implicitly used to convert `arr` according to `mode`.\nscipy.ndimage.zoom : More generic implementation that does not use PIL." - } - ] - }, - { - "name": "sklearn.externals", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_extraction.image", - "imports": [ - "from itertools import product", - "import numbers", - "import numpy as np", - "from scipy import sparse", - "from numpy.lib.stride_tricks import as_strided", - "from utils import check_array", - "from utils import check_random_state", - "from utils.validation import _deprecate_positional_args", - "from base import BaseEstimator" - ], - "classes": [ - { - "name": "PatchExtractor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "patch_size", - "type": "Tuple[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimensions of one patch." - }, - { - "name": "max_patches", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of patches per image to extract. If max_patches is a float in (0, 1), it is taken to mean a proportion of the total number of patches." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for random sampling when `max_patches` is not None. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of images from which to extract patches. For color images, the last dimension specifies the channel: a RGB image would have `n_channels=3`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transforms the image samples in X into a matrix of patch data.\n\nParameters\n----------\nX : ndarray of shape (n_samples, image_height, image_width) or (n_samples, image_height, image_width, n_channels)\n Array of images from which to extract patches. For color images,\n the last dimension specifies the channel: a RGB image would have\n `n_channels=3`.\n\nReturns\n-------\npatches : array of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the images, where\n `n_patches` is either `n_samples * max_patches` or the total\n number of patches that can be extracted." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Extracts patches from a collection of images\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\npatch_size : tuple of int (patch_height, patch_width), default=None\n The dimensions of one patch.\n\nmax_patches : int or float, default=None\n The maximum number of patches per image to extract. If max_patches is a\n float in (0, 1), it is taken to mean a proportion of the total number\n of patches.\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_images\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the second image in this dataset:\n>>> X = load_sample_images().images[1]\n>>> print('Image shape: {}'.format(X.shape))\nImage shape: (427, 640, 3)\n>>> pe = image.PatchExtractor(patch_size=(2, 2))\n>>> pe_fit = pe.fit(X)\n>>> pe_trans = pe.transform(X)\n>>> print('Patches shape: {}'.format(pe_trans.shape))\nPatches shape: (545706, 2, 2)" - } - ], - "functions": [ - { - "name": "_make_edges_3d", - "decorators": [], - "parameters": [ - { - "name": "n_x", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the grid in the x direction." - }, - { - "name": "n_y", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the grid in the y direction." - }, - { - "name": "n_z", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The size of the grid in the z direction, defaults to 1" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of edges for a 3D image.\n\nParameters\n----------\nn_x : int\n The size of the grid in the x direction.\nn_y : int\n The size of the grid in the y direction.\nn_z : integer, default=1\n The size of the grid in the z direction, defaults to 1" - }, - { - "name": "_compute_gradient_3d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_mask_edges_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply a mask to edges (weighted or not)" - }, - { - "name": "_to_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Auxiliary function for img_to_graph and grid_to_graph\n " - }, - { - "name": "img_to_graph", - "decorators": [], - "parameters": [ - { - "name": "img", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "2D or 3D image." - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional mask of the image, to consider only part of the pixels." - }, - { - "name": "return_as", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to use to build the returned adjacency matrix." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data of the returned sparse matrix. By default it is the dtype of img" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Graph of the pixel-to-pixel gradient connections\n\nEdges are weighted with the gradient values.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimg : ndarray of shape (height, width) or (height, width, channel)\n 2D or 3D image.\nmask : ndarray of shape (height, width) or (height, width, channel), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\nreturn_as : np.ndarray or a sparse matrix class, default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\ndtype : dtype, default=None\n The data of the returned sparse matrix. By default it is the\n dtype of img\n\nNotes\n-----\nFor scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\nhandled by returning a dense np.matrix instance. Going forward, np.ndarray\nreturns an np.ndarray, as expected.\n\nFor compatibility, user code relying on this method should wrap its\ncalls in ``np.asarray`` to avoid type issues." - }, - { - "name": "grid_to_graph", - "decorators": [], - "parameters": [ - { - "name": "n_x", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension in x axis" - }, - { - "name": "n_y", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension in y axis" - }, - { - "name": "n_z", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Dimension in z axis" - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional mask of the image, to consider only part of the pixels." - }, - { - "name": "return_as", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to use to build the returned adjacency matrix." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "int", - "limitation": null, - "ignored": false, - "docstring": "The data of the returned sparse matrix. By default it is int" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Graph of the pixel-to-pixel connections\n\nEdges exist if 2 voxels are connected.\n\nParameters\n----------\nn_x : int\n Dimension in x axis\nn_y : int\n Dimension in y axis\nn_z : int, default=1\n Dimension in z axis\nmask : ndarray of shape (n_x, n_y, n_z), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\nreturn_as : np.ndarray or a sparse matrix class, default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\ndtype : dtype, default=int\n The data of the returned sparse matrix. By default it is int\n\nNotes\n-----\nFor scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\nhandled by returning a dense np.matrix instance. Going forward, np.ndarray\nreturns an np.ndarray, as expected.\n\nFor compatibility, user code relying on this method should wrap its\ncalls in ``np.asarray`` to avoid type issues." - }, - { - "name": "_compute_n_patches", - "decorators": [], - "parameters": [ - { - "name": "i_h", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The image height" - }, - { - "name": "i_w", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The image with" - }, - { - "name": "p_h", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The height of a patch" - }, - { - "name": "p_w", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The width of a patch" - }, - { - "name": "max_patches", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of patches to extract. If max_patches is a float between 0 and 1, it is taken to be a proportion of the total number of patches." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the number of patches that will be extracted in an image.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ni_h : int\n The image height\ni_w : int\n The image with\np_h : int\n The height of a patch\np_w : int\n The width of a patch\nmax_patches : int or float, default=None\n The maximum number of patches to extract. If max_patches is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches." - }, - { - "name": "_extract_patches", - "decorators": [], - "parameters": [ - { - "name": "arr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-dimensional array of which patches are to be extracted" - }, - { - "name": "patch_shape", - "type": "Union[Tuple[], int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates the shape of the patches to be extracted. If an integer is given, the shape will be a hypercube of sidelength given by its value." - }, - { - "name": "extraction_step", - "type": "Union[Tuple[], int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Indicates step size at which extraction shall be performed. If integer is given, then the step is uniform in all dimensions." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Extracts patches of any n-dimensional array in place using strides.\n\nGiven an n-dimensional array it will return a 2n-dimensional array with\nthe first n dimensions indexing patch position and the last n indexing\nthe patch content. This operation is immediate (O(1)). A reshape\nperformed on the first n dimensions will cause numpy to copy data, leading\nto a list of extracted patches.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\narr : ndarray\n n-dimensional array of which patches are to be extracted\n\npatch_shape : int or tuple of length arr.ndim.default=8\n Indicates the shape of the patches to be extracted. If an\n integer is given, the shape will be a hypercube of\n sidelength given by its value.\n\nextraction_step : int or tuple of length arr.ndim, default=1\n Indicates step size at which extraction shall be performed.\n If integer is given, then the step is uniform in all dimensions.\n\n\nReturns\n-------\npatches : strided ndarray\n 2n-dimensional array indexing patches on first n dimensions and\n containing patches on the last n dimensions. These dimensions\n are fake, but this way no data is copied. A simple reshape invokes\n a copying operation to obtain a list of patches:\n result.reshape([-1] + list(patch_shape))" - }, - { - "name": "extract_patches_2d", - "decorators": [], - "parameters": [ - { - "name": "image", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The original image data. For color images, the last dimension specifies the channel: a RGB image would have `n_channels=3`." - }, - { - "name": "patch_size", - "type": "Tuple[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dimensions of one patch." - }, - { - "name": "max_patches", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of patches to extract. If `max_patches` is a float between 0 and 1, it is taken to be a proportion of the total number of patches." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for random sampling when `max_patches` is not None. Use an int to make the randomness deterministic. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reshape a 2D image into a collection of patches\n\nThe resulting patches are allocated in a dedicated array.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage : ndarray of shape (image_height, image_width) or (image_height, image_width, n_channels)\n The original image data. For color images, the last dimension specifies\n the channel: a RGB image would have `n_channels=3`.\n\npatch_size : tuple of int (patch_height, patch_width)\n The dimensions of one patch.\n\nmax_patches : int or float, default=None\n The maximum number of patches to extract. If `max_patches` is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches.\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\npatches : array of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the image, where `n_patches`\n is either `max_patches` or the total number of patches that can be\n extracted.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_image\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the first image in this dataset:\n>>> one_image = load_sample_image(\"china.jpg\")\n>>> print('Image shape: {}'.format(one_image.shape))\nImage shape: (427, 640, 3)\n>>> patches = image.extract_patches_2d(one_image, (2, 2))\n>>> print('Patches shape: {}'.format(patches.shape))\nPatches shape: (272214, 2, 2, 3)\n>>> # Here are just two of these patches:\n>>> print(patches[1])\n[[[174 201 231]\n [174 201 231]]\n [[173 200 230]\n [173 200 230]]]\n>>> print(patches[800])\n[[[187 214 243]\n [188 215 244]]\n [[187 214 243]\n [188 215 244]]]" - }, - { - "name": "reconstruct_from_patches_2d", - "decorators": [], - "parameters": [ - { - "name": "patches", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The complete set of patches. If the patches contain colour information, channels are indexed along the last dimension: RGB patches would have `n_channels=3`." - }, - { - "name": "image_size", - "type": "Tuple[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the image that will be reconstructed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reconstruct the image from all of its patches.\n\nPatches are assumed to overlap and the image is constructed by filling in\nthe patches from left to right, top to bottom, averaging the overlapping\nregions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npatches : ndarray of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The complete set of patches. If the patches contain colour information,\n channels are indexed along the last dimension: RGB patches would\n have `n_channels=3`.\n\nimage_size : tuple of int (image_height, image_width) or (image_height, image_width, n_channels)\n The size of the image that will be reconstructed.\n\nReturns\n-------\nimage : ndarray of shape image_size\n The reconstructed image." - } - ] - }, - { - "name": "sklearn.feature_extraction.setup", - "imports": [ - "import os", - "import platform", - "import numpy", - "from numpy.distutils.misc_util import Configuration" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.text", - "imports": [ - "import array", - "from collections import defaultdict", - "from collections.abc import Mapping", - "from functools import partial", - "import numbers", - "from operator import itemgetter", - "import re", - "import unicodedata", - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from preprocessing import normalize", - "from _hash import FeatureHasher", - "from _stop_words import ENGLISH_STOP_WORDS", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import FLOAT_DTYPES", - "from utils import _IS_32BIT", - "from utils.fixes import _astype_copy_false", - "from exceptions import NotFittedError", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "_VectorizerMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decode", - "decorators": [], - "parameters": [ - { - "name": "doc", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to decode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Decode the input into a string of unicode symbols.\n\nThe decoding strategy depends on the vectorizer parameters.\n\nParameters\n----------\ndoc : str\n The string to decode.\n\nReturns\n-------\ndoc: str\n A string of unicode symbols." - }, - { - "name": "_word_ngrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Turn tokens into a sequence of n-grams after stop words filtering" - }, - { - "name": "_char_ngrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tokenize text_document into a sequence of character n-grams" - }, - { - "name": "_char_wb_ngrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Whitespace sensitive char-n-gram tokenization.\n\nTokenize text_document into a sequence of character n-grams\noperating only inside word boundaries. n-grams at the edges\nof words are padded with space." - }, - { - "name": "build_preprocessor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a function to preprocess the text before tokenization.\n\nReturns\n-------\npreprocessor: callable\n A function to preprocess the text before tokenization." - }, - { - "name": "build_tokenizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a function that splits a string into a sequence of tokens.\n\nReturns\n-------\ntokenizer: callable\n A function to split a string into a sequence of tokens." - }, - { - "name": "get_stop_words", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Build or fetch the effective stop words list.\n\nReturns\n-------\nstop_words: list or None\n A list of stop words." - }, - { - "name": "_check_stop_words_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Check if stop words are consistent\n\nReturns\n-------\nis_consistent : True if stop words are consistent with the preprocessor\n and tokenizer, False if they are not, None if the check\n was previously performed, \"error\" if it could not be\n performed (e.g. because of the use of a custom\n preprocessor / tokenizer)" - }, - { - "name": "build_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a callable that handles preprocessing, tokenization\nand n-grams generation.\n\nReturns\n-------\nanalyzer: callable\n A function to handle preprocessing, tokenization\n and n-grams generation." - }, - { - "name": "_validate_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if vocabulary is empty or missing (not fitted)" - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check validity of ngram_range parameter" - }, - { - "name": "_warn_for_unused_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Provides common code for text vectorizers (tokenization logic)." - }, - { - "name": "HashingVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "input", - "type": "Union[Literal['filename', 'file', 'content'], str]", - "hasDefault": true, - "default": "'content'", - "limitation": null, - "ignored": false, - "docstring": "If 'filename', the sequence passed as an argument to fit is expected to be a list of filenames that need reading to fetch the raw content to analyze. If 'file', the sequence items must have a 'read' method (file-like object) that is called to fetch the bytes in memory. Otherwise the input is expected to be a sequence of items that can be of type string or byte." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "'utf-8'", - "limitation": null, - "ignored": false, - "docstring": "If bytes or files are given to analyze, this encoding is used to decode." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'." - }, - { - "name": "strip_accents", - "type": "Literal['ascii', 'unicode']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have an direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`." - }, - { - "name": "lowercase", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Convert all characters to lowercase before tokenizing." - }, - { - "name": "preprocessor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer is not callable``." - }, - { - "name": "tokenizer", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``." - }, - { - "name": "stop_words", - "type": "Union[Literal['english'], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'english', a built-in stop word list for English is used. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``." - }, - { - "name": "token_pattern", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regular expression denoting what constitutes a \"token\", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted." - }, - { - "name": "ngram_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper boundary of the range of n-values for different n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams. Only applies if ``analyzer is not callable``." - }, - { - "name": "analyzer", - "type": "Literal['word', 'char', 'char_wb']", - "hasDefault": true, - "default": "'word'", - "limitation": null, - "ignored": false, - "docstring": "Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21 Since v0.21, if ``input`` is ``filename`` or ``file``, the data is first read from the file and then passed to the given callable analyzer." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features (columns) in the output matrices. Small numbers of features are likely to cause hash collisions, but large numbers will cause larger coefficient dimensions in linear learners." - }, - { - "name": "binary", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, all non zero counts are set to 1. This is useful for discrete probabilistic models that model binary events rather than integer counts." - }, - { - "name": "norm", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Norm used to normalize term vectors. None for no normalization." - }, - { - "name": "alternate_sign", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When True, an alternating sign is added to the features as to approximately conserve the inner product in the hashed space even for small n_features. This approach is similar to sparse random projection. .. versionadded:: 0.19" - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Type of the matrix returned by fit_transform() or transform()." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Does nothing: this transformer is stateless.\n\nThis method is just there to mark the fact that this transformer\ncan work in a streaming setup.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n Training data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Does nothing: this transformer is stateless.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n Training data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. Each sample must be a text document (either bytes or unicode strings, file name or file object depending on the constructor argument) which will be tokenized and hashed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. Each sample must be a text document (either bytes or unicode strings, file name or file object depending on the constructor argument) which will be tokenized and hashed." - }, - { - "name": "y", - "type": "Any", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with sklearn.pipeline.Pipeline." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\ny : any\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "_get_hasher", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Convert a collection of text documents to a matrix of token occurrences\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n store a vocabulary dictionary in memory\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n constructor parameters\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n string feature names) which can be a problem when trying to introspect\n which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n feature index. However in practice this is rarely an issue if n_features\n is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\ninput : string {'filename', 'file', 'content'}, default='content'\n If 'filename', the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n If 'file', the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n Otherwise the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nstop_words : string {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nn_features : int, default=(2 ** 20)\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\n\nbinary : bool, default=False.\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\nnorm : {'l1', 'l2'}, default='l2'\n Norm used to normalize term vectors. None for no normalization.\n\nalternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n .. versionadded:: 0.19\n\ndtype : type, default=np.float64\n Type of the matrix returned by fit_transform() or transform().\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import HashingVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = HashingVectorizer(n_features=2**4)\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(X.shape)\n(4, 16)\n\nSee Also\n--------\nCountVectorizer, TfidfVectorizer" - }, - { - "name": "CountVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "input", - "type": "Union[Literal['filename', 'file', 'content'], str]", - "hasDefault": true, - "default": "'content'", - "limitation": null, - "ignored": false, - "docstring": "If 'filename', the sequence passed as an argument to fit is expected to be a list of filenames that need reading to fetch the raw content to analyze. If 'file', the sequence items must have a 'read' method (file-like object) that is called to fetch the bytes in memory. Otherwise the input is expected to be a sequence of items that can be of type string or byte." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "'utf-8'", - "limitation": null, - "ignored": false, - "docstring": "If bytes or files are given to analyze, this encoding is used to decode." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'." - }, - { - "name": "strip_accents", - "type": "Literal['ascii', 'unicode']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have an direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`." - }, - { - "name": "lowercase", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Convert all characters to lowercase before tokenizing." - }, - { - "name": "preprocessor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the preprocessing (strip_accents and lowercase) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer is not callable``." - }, - { - "name": "tokenizer", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``." - }, - { - "name": "stop_words", - "type": "Union[Literal['english'], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'english', a built-in stop word list for English is used. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. max_df can be set to a value in the range [0.7, 1.0) to automatically detect and filter stop words based on intra corpus document frequency of terms." - }, - { - "name": "token_pattern", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regular expression denoting what constitutes a \"token\", only used if ``analyzer == 'word'``. The default regexp select tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted." - }, - { - "name": "ngram_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper boundary of the range of n-values for different word n-grams or char n-grams to be extracted. All values of n such such that min_n <= n <= max_n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams. Only applies if ``analyzer is not callable``." - }, - { - "name": "analyzer", - "type": "Literal['word', 'char', 'char_wb']", - "hasDefault": true, - "default": "'word'", - "limitation": null, - "ignored": false, - "docstring": "Whether the feature should be made of word n-gram or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21 Since v0.21, if ``input`` is ``filename`` or ``file``, the data is first read from the file and then passed to the given callable analyzer." - }, - { - "name": "max_df", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "min_df", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. This value is also called cut-off in the literature. If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "max_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. This parameter is ignored if vocabulary is not None." - }, - { - "name": "vocabulary", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Either a Mapping (e.g., a dict) where keys are terms and values are indices in the feature matrix, or an iterable over terms. If not given, a vocabulary is determined from the input documents. Indices in the mapping should not be repeated and should not have any gap between 0 and the largest index." - }, - { - "name": "binary", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, all non zero counts are set to 1. This is useful for discrete probabilistic models that model binary events rather than integer counts." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Type of the matrix returned by fit_transform() or transform()." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sort_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sort features by name\n\nReturns a reordered matrix and modifies the vocabulary in place" - }, - { - "name": "_limit_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Remove too rare or too common features.\n\nPrune features that are non zero in more samples than high or less\ndocuments than low, modifying the vocabulary, and restricting it to\nat most the limit most frequent.\n\nThis does not prune samples with zero features." - }, - { - "name": "_count_vocab", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create sparse feature matrix, and vocabulary where fixed_vocab=False\n " - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn a vocabulary dictionary of all tokens in the raw documents.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nself" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn the vocabulary dictionary and return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : array of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform documents to document-term matrix.\n\nExtract token counts out of raw text documents using the vocabulary\nfitted with fit or the one provided to the constructor.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Document-term matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return terms per document with nonzero entries in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document-term matrix.\n\nReturns\n-------\nX_inv : list of arrays of shape (n_samples,)\n List of arrays of terms." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Array mapping from feature integer indices to feature name.\n\nReturns\n-------\nfeature_names : list\n A list of feature names." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Convert a collection of text documents to a matrix of token counts\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ninput : string {'filename', 'file', 'content'}, default='content'\n If 'filename', the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n If 'file', the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n Otherwise the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (strip_accents and lowercase) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nstop_words : string {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp select tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n word n-grams or char n-grams to be extracted. All values of n such\n such that min_n <= n <= max_n will be used. For example an\n ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n Only applies if ``analyzer is not callable``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word n-gram or character\n n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nmax_df : float in range [0.0, 1.0] or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmin_df : float in range [0.0, 1.0] or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents. Indices\n in the mapping should not be repeated and should not have any gap\n between 0 and the largest index.\n\nbinary : bool, default=False\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\ndtype : type, default=np.int64\n Type of the matrix returned by fit_transform() or transform().\n\nAttributes\n----------\nvocabulary_ : dict\n A mapping of terms to feature indices.\n\nfixed_vocabulary_: boolean\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\nstop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = CountVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(vectorizer.get_feature_names())\n['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n>>> print(X.toarray())\n[[0 1 1 1 0 0 1 0 1]\n [0 2 0 1 0 1 1 0 1]\n [1 0 0 1 1 0 1 1 1]\n [0 1 1 1 0 0 1 0 1]]\n>>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n>>> X2 = vectorizer2.fit_transform(corpus)\n>>> print(vectorizer2.get_feature_names())\n['and this', 'document is', 'first document', 'is the', 'is this',\n'second document', 'the first', 'the second', 'the third', 'third one',\n 'this document', 'this is', 'this the']\n >>> print(X2.toarray())\n [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n [0 1 0 1 0 1 0 1 0 0 1 0 0]\n [1 0 0 1 0 0 0 0 1 1 0 1 0]\n [0 0 1 0 1 0 1 0 0 0 0 0 1]]\n\nSee Also\n--------\nHashingVectorizer, TfidfVectorizer\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling." - }, - { - "name": "TfidfTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "norm", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Each output row will have unit norm, either: * 'l2': Sum of squares of vector elements is 1. The cosine similarity between two vectors is their dot product when l2 norm has been applied. * 'l1': Sum of absolute values of vector elements is 1. See :func:`preprocessing.normalize`" - }, - { - "name": "use_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Enable inverse-document-frequency reweighting." - }, - { - "name": "smooth_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Smooth idf weights by adding one to document frequencies, as if an extra document was seen containing every term in the collection exactly once. Prevents zero divisions." - }, - { - "name": "sublinear_tf", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A matrix of term/token counts." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn the idf vector (global term weights).\n\nParameters\n----------\nX : sparse matrix of shape n_samples, n_features)\n A matrix of term/token counts." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "a matrix of term/token counts" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to copy X and operate on the copy or perform in-place operations." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a count matrix to a tf or tf-idf representation\n\nParameters\n----------\nX : sparse matrix of (n_samples, n_features)\n a matrix of term/token counts\n\ncopy : bool, default=True\n Whether to copy X and operate on the copy or perform in-place\n operations.\n\nReturns\n-------\nvectors : sparse matrix of shape (n_samples, n_features)" - }, - { - "name": "idf_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform a count matrix to a normalized tf or tf-idf representation\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnorm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`\n\nuse_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\nsmooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nidf_ : array of shape (n_features)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\n .. versionadded:: 0.20\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfTransformer\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> from sklearn.pipeline import Pipeline\n>>> import numpy as np\n>>> corpus = ['this is the first document',\n... 'this document is the second document',\n... 'and this is the third one',\n... 'is this the first document']\n>>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n... 'and', 'one']\n>>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n... ('tfid', TfidfTransformer())]).fit(corpus)\n>>> pipe['count'].transform(corpus).toarray()\narray([[1, 1, 1, 1, 0, 1, 0, 0],\n [1, 2, 0, 1, 1, 1, 0, 0],\n [1, 0, 0, 1, 0, 1, 1, 1],\n [1, 1, 1, 1, 0, 1, 0, 0]])\n>>> pipe['tfid'].idf_\narray([1. , 1.22314355, 1.51082562, 1. , 1.91629073,\n 1. , 1.91629073, 1.91629073])\n>>> pipe.transform(corpus).shape\n(4, 8)\n\nReferences\n----------\n\n.. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n Information Retrieval. Addison Wesley, pp. 68-74.\n\n.. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze (2008).\n Introduction to Information Retrieval. Cambridge University\n Press, pp. 118-120." - }, - { - "name": "TfidfVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "input", - "type": "Literal['filename', 'file', 'content']", - "hasDefault": true, - "default": "'content'", - "limitation": null, - "ignored": false, - "docstring": "If 'filename', the sequence passed as an argument to fit is expected to be a list of filenames that need reading to fetch the raw content to analyze. If 'file', the sequence items must have a 'read' method (file-like object) that is called to fetch the bytes in memory. Otherwise the input is expected to be a sequence of items that can be of type string or byte." - }, - { - "name": "encoding", - "type": "str", - "hasDefault": true, - "default": "'utf-8'", - "limitation": null, - "ignored": false, - "docstring": "If bytes or files are given to analyze, this encoding is used to decode." - }, - { - "name": "decode_error", - "type": "Literal['strict', 'ignore', 'replace']", - "hasDefault": true, - "default": "'strict'", - "limitation": null, - "ignored": false, - "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'." - }, - { - "name": "strip_accents", - "type": "Literal['ascii', 'unicode']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have an direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) does nothing. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`." - }, - { - "name": "lowercase", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Convert all characters to lowercase before tokenizing." - }, - { - "name": "preprocessor", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer is not callable``." - }, - { - "name": "tokenizer", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``." - }, - { - "name": "analyzer", - "type": "Literal['word', 'char', 'char_wb']", - "hasDefault": true, - "default": "'word'", - "limitation": null, - "ignored": false, - "docstring": "Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21 Since v0.21, if ``input`` is ``filename`` or ``file``, the data is first read from the file and then passed to the given callable analyzer." - }, - { - "name": "stop_words", - "type": "Literal['english']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If a string, it is passed to _check_stop_list and the appropriate stop list is returned. 'english' is currently the only supported string value. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. max_df can be set to a value in the range [0.7, 1.0) to automatically detect and filter stop words based on intra corpus document frequency of terms." - }, - { - "name": "token_pattern", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regular expression denoting what constitutes a \"token\", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted." - }, - { - "name": "ngram_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper boundary of the range of n-values for different n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams. Only applies if ``analyzer is not callable``." - }, - { - "name": "max_df", - "type": "Union[int, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). If float in range [0.0, 1.0], the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "min_df", - "type": "Union[int, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. This value is also called cut-off in the literature. If float in range of [0.0, 1.0], the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None." - }, - { - "name": "max_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. This parameter is ignored if vocabulary is not None." - }, - { - "name": "vocabulary", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Either a Mapping (e.g., a dict) where keys are terms and values are indices in the feature matrix, or an iterable over terms. If not given, a vocabulary is determined from the input documents." - }, - { - "name": "binary", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, all non-zero term counts are set to 1. This does not mean outputs will have only 0/1 values, only that the tf term in tf-idf is binary. (Set idf and normalization to False to get 0/1 outputs)." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "float64", - "limitation": null, - "ignored": false, - "docstring": "Type of the matrix returned by fit_transform() or transform()." - }, - { - "name": "norm", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Each output row will have unit norm, either: * 'l2': Sum of squares of vector elements is 1. The cosine similarity between two vectors is their dot product when l2 norm has been applied. * 'l1': Sum of absolute values of vector elements is 1. See :func:`preprocessing.normalize`." - }, - { - "name": "use_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Enable inverse-document-frequency reweighting." - }, - { - "name": "smooth_idf", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Smooth idf weights by adding one to document frequencies, as if an extra document was seen containing every term in the collection exactly once. Prevents zero divisions." - }, - { - "name": "sublinear_tf", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "use_idf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "smooth_idf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "sublinear_tf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "idf_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is not needed to compute tfidf." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn vocabulary and idf from training set.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\ny : None\n This parameter is not needed to compute tfidf.\n\nReturns\n-------\nself : object\n Fitted vectorizer." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn vocabulary and idf, return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\ny : None\n This parameter is ignored.\n\nReturns\n-------\nX : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "raw_documents", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An iterable which yields either str, unicode or file objects." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform documents to document-term matrix.\n\nUses the vocabulary and document frequencies (df) learned by fit (or\nfit_transform).\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n If 'filename', the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n If 'file', the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n Otherwise the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : str, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nstop_words : {'english'}, list, default=None\n If a string, it is passed to _check_stop_list and the appropriate stop\n list is returned. 'english' is currently the only supported string\n value.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\nmax_df : float or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float in range [0.0, 1.0], the parameter represents a proportion of\n documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmin_df : float or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float in range of [0.0, 1.0], the parameter represents a proportion\n of documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents.\n\nbinary : bool, default=False\n If True, all non-zero term counts are set to 1. This does not mean\n outputs will have only 0/1 values, only that the tf term in tf-idf\n is binary. (Set idf and normalization to False to get 0/1 outputs).\n\ndtype : dtype, default=float64\n Type of the matrix returned by fit_transform() or transform().\n\nnorm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`.\n\nuse_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\nsmooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nvocabulary_ : dict\n A mapping of terms to feature indices.\n\nfixed_vocabulary_: bool\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\nidf_ : array of shape (n_features,)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\nstop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfTransformer : Performs the TF-IDF transformation from a provided\n matrix of counts.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = TfidfVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(vectorizer.get_feature_names())\n['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n>>> print(X.shape)\n(4, 9)" - } - ], - "functions": [ - { - "name": "_preprocess", - "decorators": [], - "parameters": [ - { - "name": "doc: str", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to preprocess" - }, - { - "name": "accent_function: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function for handling accented characters. Common strategies include normalizing and removing." - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function for handling accented characters. Common strategies include normalizing and removing." - }, - { - "name": "lower: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use str.lower to lowercase all fo the text" - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use str.lower to lowercase all fo the text" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Chain together an optional series of text preprocessing steps to\napply to a document.\n\nParameters\n----------\ndoc: str\n The string to preprocess\naccent_function: callable, default=None\n Function for handling accented characters. Common strategies include\n normalizing and removing.\nlower: bool, default=False\n Whether to use str.lower to lowercase all fo the text\n\nReturns\n-------\ndoc: str\n preprocessed string" - }, - { - "name": "_analyze", - "decorators": [], - "parameters": [ - { - "name": "analyzer: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "tokenizer: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "ngrams: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "preprocessor: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "decoder: callable", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "stop_words: list", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Chain together an optional series of text processing steps to go from\na single document to ngrams, with or without tokenizing or preprocessing.\n\nIf analyzer is used, only the decoder argument is used, as the analyzer is\nintended to replace the preprocessor, tokenizer, and ngrams steps.\n\nParameters\n----------\nanalyzer: callable, default=None\ntokenizer: callable, default=None\nngrams: callable, default=None\npreprocessor: callable, default=None\ndecoder: callable, default=None\nstop_words: list, default=None\n\nReturns\n-------\nngrams: list\n A sequence of tokens, possibly with pairs, triples, etc." - }, - { - "name": "strip_accents_unicode", - "decorators": [], - "parameters": [ - { - "name": "s", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to strip" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform accentuated unicode symbols into their simple counterpart\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.\n\nParameters\n----------\ns : string\n The string to strip\n\nSee Also\n--------\nstrip_accents_ascii : Remove accentuated char for any unicode symbol that\n has a direct ASCII equivalent." - }, - { - "name": "strip_accents_ascii", - "decorators": [], - "parameters": [ - { - "name": "s", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to strip" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform accentuated unicode symbols into ascii or nothing\n\nWarning: this solution is only suited for languages that have a direct\ntransliteration to ASCII symbols.\n\nParameters\n----------\ns : string\n The string to strip\n\nSee Also\n--------\nstrip_accents_unicode : Remove accentuated char for any unicode symbol." - }, - { - "name": "strip_tags", - "decorators": [], - "parameters": [ - { - "name": "s", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The string to strip" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Basic regexp based HTML / XML tag stripper function\n\nFor serious HTML/XML preprocessing you should rather use an external\nlibrary such as lxml or BeautifulSoup.\n\nParameters\n----------\ns : string\n The string to strip" - }, - { - "name": "_check_stop_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_document_frequency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Count the number of non-zero values for each feature in sparse X." - }, - { - "name": "_make_int_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Construct an array.array of a type suitable for scipy.sparse indices." - } - ] - }, - { - "name": "sklearn.feature_extraction._dict_vectorizer", - "imports": [ - "from array import array", - "from collections.abc import Mapping", - "from collections.abc import Iterable", - "from operator import itemgetter", - "from numbers import Number", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import tosequence", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "DictVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The type of feature values. Passed to Numpy array/scipy.sparse matrix constructors as the dtype argument." - }, - { - "name": "separator", - "type": "str", - "hasDefault": true, - "default": "\"", - "limitation": null, - "ignored": false, - "docstring": "Separator string used when constructing new features for one-hot coding." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether transform should produce scipy.sparse matrices." - }, - { - "name": "sort", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether ``feature_names_`` and ``vocabulary_`` should be sorted when fitting." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_add_iterable_element", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Add feature names for iterable of strings" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype). .. versionchanged:: 0.24 Accepts multiple string values for one categorical feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn a list of feature name -> indices mappings.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n\nReturns\n-------\nself" - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype). .. versionchanged:: 0.24 Accepts multiple string values for one categorical feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learn a list of feature name -> indices mappings and transform X.\n\nLike fit(X) followed by transform(X), but does not require\nmaterializing X in memory.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n\nReturns\n-------\nXa : {array, sparse matrix}\n Feature vectors; always 2-d." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample matrix." - }, - { - "name": "dict_type", - "type": null, - "hasDefault": true, - "default": "dict", - "limitation": null, - "ignored": false, - "docstring": "Constructor for feature mappings. Must conform to the collections.Mapping API." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform array or sparse matrix X back to feature mappings.\n\nX must have been produced by this DictVectorizer's transform or\nfit_transform method; it may only have passed through transformers\nthat preserve the number of features and their order.\n\nIn the case of one-hot/one-of-K coding, the constructed feature\nnames and values are returned rather than the original ones.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample matrix.\ndict_type : type, default=dict\n Constructor for feature mappings. Must conform to the\n collections.Mapping API.\n\nReturns\n-------\nD : list of dict_type objects of shape (n_samples,)\n Feature mappings for the samples in X." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform feature->value dicts to array or sparse matrix.\n\nNamed features not encountered during fit or fit_transform will be\nsilently ignored.\n\nParameters\n----------\nX : Mapping or iterable over Mappings of shape (n_samples,)\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\nReturns\n-------\nXa : {array, sparse matrix}\n Feature vectors; always 2-d." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of feature names, ordered by their indices.\n\nIf one-of-K coding is applied to categorical features, this will\ninclude the constructed feature names but not the original ones." - }, - { - "name": "restrict", - "decorators": [], - "parameters": [ - { - "name": "support", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Boolean mask or list of indices (as returned by the get_support member of feature selectors)." - }, - { - "name": "indices", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether support is a list of indices." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Restrict the features to those in support using feature selection.\n\nThis function modifies the estimator in-place.\n\nParameters\n----------\nsupport : array-like\n Boolean mask or list of indices (as returned by the get_support\n member of feature selectors).\nindices : bool, default=False\n Whether support is a list of indices.\n\nReturns\n-------\nself\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> v = DictVectorizer()\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n>>> v.get_feature_names()\n['bar', 'baz', 'foo']\n>>> v.restrict(support.get_support())\nDictVectorizer()\n>>> v.get_feature_names()\n['bar', 'foo']" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndtype : dtype, default=np.float64\n The type of feature values. Passed to Numpy array/scipy.sparse matrix\n constructors as the dtype argument.\nseparator : str, default=\"=\"\n Separator string used when constructing new features for one-hot\n coding.\nsparse : bool, default=True\n Whether transform should produce scipy.sparse matrices.\nsort : bool, default=True\n Whether ``feature_names_`` and ``vocabulary_`` should be\n sorted when fitting.\n\nAttributes\n----------\nvocabulary_ : dict\n A dictionary mapping feature names to feature indices.\n\nfeature_names_ : list\n A list of length n_features containing the feature names (e.g., \"f=ham\"\n and \"f=spam\").\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> v = DictVectorizer(sparse=False)\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> X\narray([[2., 0., 1.],\n [0., 1., 3.]])\n>>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n... {'baz': 1.0, 'foo': 3.0}]\nTrue\n>>> v.transform({'foo': 4, 'unseen_feature': 3})\narray([[0., 0., 4.]])\n\nSee Also\n--------\nFeatureHasher : Performs vectorization using only a hash function.\nsklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n features encoded as columns of arbitrary data types." - } - ], - "functions": [ - { - "name": "_tosequence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Turn X into a sequence or ndarray, avoiding a copy if possible." - } - ] - }, - { - "name": "sklearn.feature_extraction._hash", - "imports": [ - "import numbers", - "import numpy as np", - "import scipy.sparse as sp", - "from utils import IS_PYPY", - "from utils.validation import _deprecate_positional_args", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from _hashing_fast import transform as _hashing_transform" - ], - "classes": [ - { - "name": "FeatureHasher", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_features", - "type": "int", - "hasDefault": true, - "default": "2**20", - "limitation": null, - "ignored": false, - "docstring": "The number of features (columns) in the output matrices. Small numbers of features are likely to cause hash collisions, but large numbers will cause larger coefficient dimensions in linear learners." - }, - { - "name": "input_type", - "type": "Literal[\"dict\", \"pair\", \"string\"]", - "hasDefault": true, - "default": "\"dict\"", - "limitation": null, - "ignored": false, - "docstring": "Either \"dict\" (the default) to accept dictionaries over (feature_name, value); \"pair\" to accept pairs of (feature_name, value); or \"string\" to accept single strings. feature_name should be a string, while value should be a number. In the case of \"string\", a value of 1 is implied. The feature_name is hashed to find the appropriate column for the feature. The value's sign might be flipped in the output (but see non_negative, below)." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The type of feature values. Passed to scipy.sparse matrix constructors as the dtype argument. Do not set this to bool, np.boolean or any unsigned integer type." - }, - { - "name": "alternate_sign", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When True, an alternating sign is added to the features as to approximately conserve the inner product in the hashed space even for small n_features. This approach is similar to sparse random projection." - }, - { - "name": ".. versionchanged:: 0.19", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``alternate_sign`` replaces the now deprecated ``non_negative`` parameter." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "No-op.\n\nThis method doesn't do anything. It exists purely for compatibility\nwith the scikit-learn transformer API.\n\nParameters\n----------\nX : ndarray\n\nReturns\n-------\nself : FeatureHasher" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "raw_X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples. Each sample must be iterable an (e.g., a list or tuple) containing/generating feature names (and optionally values, see the input_type constructor argument) which will be hashed. raw_X need not support the len function, so it can be the result of a generator; n_samples is determined on the fly." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform a sequence of instances to a scipy.sparse matrix.\n\nParameters\n----------\nraw_X : iterable over iterable over raw features, length = n_samples\n Samples. Each sample must be iterable an (e.g., a list or tuple)\n containing/generating feature names (and optionally values, see\n the input_type constructor argument) which will be hashed.\n raw_X need not support the len function, so it can be the result\n of a generator; n_samples is determined on the fly.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Feature matrix, for use with estimators or further transformers." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_features : int, default=2**20\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\ninput_type : {\"dict\", \"pair\", \"string\"}, default=\"dict\"\n Either \"dict\" (the default) to accept dictionaries over\n (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n or \"string\" to accept single strings.\n feature_name should be a string, while value should be a number.\n In the case of \"string\", a value of 1 is implied.\n The feature_name is hashed to find the appropriate column for the\n feature. The value's sign might be flipped in the output (but see\n non_negative, below).\ndtype : numpy dtype, default=np.float64\n The type of feature values. Passed to scipy.sparse matrix constructors\n as the dtype argument. Do not set this to bool, np.boolean or any\n unsigned integer type.\nalternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n.. versionchanged:: 0.19\n ``alternate_sign`` replaces the now deprecated ``non_negative``\n parameter.\n\nExamples\n--------\n>>> from sklearn.feature_extraction import FeatureHasher\n>>> h = FeatureHasher(n_features=10)\n>>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n>>> f = h.transform(D)\n>>> f.toarray()\narray([[ 0., 0., -4., -1., 0., 0., 0., 0., 0., 2.],\n [ 0., 0., 0., -2., -5., 0., 0., 0., 0., 0.]])\n\nSee Also\n--------\nDictVectorizer : Vectorizes string-valued features using a hash table.\nsklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features." - } - ], - "functions": [ - { - "name": "_hashing_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iteritems", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Like d.iteritems, but accepts any collections.Mapping." - } - ] - }, - { - "name": "sklearn.feature_extraction._stop_words", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_extraction", - "imports": [ - "from _dict_vectorizer import DictVectorizer", - "from _hash import FeatureHasher", - "from image import img_to_graph", - "from image import grid_to_graph", - "from None import text" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_extraction.tests.test_dict_vectorizer", - "imports": [ - "from random import Random", - "import numpy as np", - "import scipy.sparse as sp", - "from numpy.testing import assert_array_equal", - "import pytest", - "from sklearn.feature_extraction import DictVectorizer", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import chi2" - ], - "classes": [], - "functions": [ - { - "name": "test_dictvectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_of_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterable_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterable_not_string_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mapping_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unseen_or_no_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deterministic_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests.test_feature_hasher", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_equal", - "import pytest", - "from sklearn.feature_extraction import FeatureHasher", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import fails_if_pypy", - "from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform" - ], - "classes": [], - "functions": [ - { - "name": "test_feature_hasher_dicts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_hasher_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashing_transform_seed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_hasher_pairs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_hasher_pairs_with_string_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hash_empty_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasher_alternate_sign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hash_collisions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests.test_image", - "imports": [ - "import numpy as np", - "import scipy as sp", - "from scipy import ndimage", - "from scipy.sparse.csgraph import connected_components", - "import pytest", - "from sklearn.feature_extraction.image import img_to_graph", - "from sklearn.feature_extraction.image import grid_to_graph", - "from sklearn.feature_extraction.image import extract_patches_2d", - "from sklearn.feature_extraction.image import reconstruct_from_patches_2d", - "from sklearn.feature_extraction.image import PatchExtractor", - "from sklearn.feature_extraction.image import _extract_patches", - "from sklearn.utils._testing import ignore_warnings", - "from scipy import misc" - ], - "classes": [], - "functions": [ - { - "name": "test_img_to_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_to_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connect_regions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connect_regions_with_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_downsampled_face", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_orange_face", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_images", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_all", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_all_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_all_rect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_max_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patch_same_size_image", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_less_than_max_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_reconstruct_patches_perfect", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_reconstruct_patches_perfect_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_max_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_max_patches_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_all_patches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_patch_extractor_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_strided", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_extract_patches_square", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_width_patch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests.test_text", - "imports": [ - "from collections.abc import Mapping", - "import re", - "import pytest", - "from scipy import sparse", - "from sklearn.feature_extraction.text import strip_tags", - "from sklearn.feature_extraction.text import strip_accents_unicode", - "from sklearn.feature_extraction.text import strip_accents_ascii", - "from sklearn.feature_extraction.text import HashingVectorizer", - "from sklearn.feature_extraction.text import CountVectorizer", - "from sklearn.feature_extraction.text import TfidfTransformer", - "from sklearn.feature_extraction.text import TfidfVectorizer", - "from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.pipeline import Pipeline", - "from sklearn.svm import LinearSVC", - "from sklearn.base import clone", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import fails_if_pypy", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import skip_if_32bit", - "from collections import defaultdict", - "from functools import partial", - "import pickle", - "from io import StringIO" - ], - "classes": [], - "functions": [ - { - "name": "uppercase", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "strip_eacute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split_tokenize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "lazy_analyze", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_strip_accents", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_to_ascii", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_word_analyzer_unigrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_word_analyzer_unigrams_and_bigrams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unicode_decode_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_char_ngram_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_char_wb_ngram_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_word_ngram_analyzer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary_repeated_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_vocabulary_gap_index", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_stop_words", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_empty_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_countvectorizer_twice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_custom_token_pattern", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check `get_feature_names()` when a custom token pattern is passed.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/12971" - }, - { - "name": "test_countvectorizer_custom_token_pattern_with_several_group", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we raise an error if token pattern capture several groups.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/12971" - }, - { - "name": "test_tf_idf_smoothing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_no_smoothing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sublinear_tf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_setters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashing_vectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_vectorizer_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_max_df", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_min_df", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_binary_occurrences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashed_binary_occurrences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_vectorizer_pipeline_grid_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_pipeline_grid_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_pipeline_cross_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_unicode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_with_fixed_vocabulary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_vectorizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_built_processors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tokenizers cannot be pickled\nhttps://github.com/scikit-learn/scikit-learn/issues/12833" - }, - { - "name": "test_countvectorizer_vocab_sets_when_pickling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_vocab_dicts_when_pickling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stop_words_removal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transformer_idf_setter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_setter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidfvectorizer_invalid_idf_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_unique_vocab", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hashingvectorizer_nan_in_docs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidfvectorizer_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidfvectorizer_export_idf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_vocab_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_string_object_as_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_transformer_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_transformer_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tfidf_vectorizer_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizers_invalid_ngram_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_stop_words_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vectorizer_stop_words_inconsistent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_countvectorizer_sort_features_64bit_sparse_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that CountVectorizer._sort_features preserves the dtype of its sparse\nfeature matrix.\n\nThis test is skipped on 32bit platforms, see:\n https://github.com/scikit-learn/scikit-learn/pull/11295\nfor more details." - }, - { - "name": "test_stop_word_validation_custom_preprocessor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_analyzer_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_analyzer_change_behavior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_analyzer_reraise_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unused_parameters_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tie_breaking_sample_order_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_extraction.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_selection._base", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from warnings import warn", - "from operator import attrgetter", - "import numpy as np", - "from scipy.sparse import issparse", - "from scipy.sparse import csc_matrix", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import safe_mask", - "from utils import safe_sqr", - "from utils._tags import _safe_tags" - ], - "classes": [ - { - "name": "SelectorMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "get_support", - "decorators": [], - "parameters": [ - { - "name": "indices", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the return value will be an array of integers, rather than a boolean mask." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a mask, or integer index, of the features selected\n\nParameters\n----------\nindices : bool, default=False\n If True, the return value will be an array of integers, rather\n than a boolean mask.\n\nReturns\n-------\nsupport : array\n An index that selects the retained features from a feature vector.\n If `indices` is False, this is a boolean array of shape\n [# input features], in which an element is True iff its\n corresponding feature is selected for retention. If `indices` is\n True, this is an integer array of shape [# output features] whose\n values are indices into the input feature vector." - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Get the boolean mask indicating which features are selected\n\nReturns\n-------\nsupport : boolean array of shape [# input features]\n An element is True iff its corresponding feature is selected for\n retention." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce X to the selected features.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\nX_r : array of shape [n_samples, n_selected_features]\n The input samples with only the selected features." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reverse the transformation operation\n\nParameters\n----------\nX : array of shape [n_samples, n_selected_features]\n The input samples.\n\nReturns\n-------\nX_r : array of shape [n_samples, n_original_features]\n `X` with columns of zeros inserted where features would have\n been removed by :meth:`transform`." - } - ], - "docstring": "Transformer mixin that performs feature selection given a support mask\n\nThis mixin provides a feature selector implementation with `transform` and\n`inverse_transform` functionality given an implementation of\n`_get_support_mask`." - } - ], - "functions": [ - { - "name": "_get_feature_importances", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A scikit-learn estimator from which we want to get the feature importances." - }, - { - "name": "getter", - "type": "Union[Callable, str, Literal[\"auto\"]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An attribute or a callable to get the feature importance. If `\"auto\"`, `estimator` is expected to expose `coef_` or `feature_importances`." - }, - { - "name": "transform_func", - "type": "Literal[\"norm\", \"square\"]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transform to apply to the feature importances. By default (`None`) no transformation is applied." - }, - { - "name": "norm_order", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The norm order to apply when `transform_func=\"norm\"`. Only applied when `importances.ndim > 1`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Retrieve and aggregate (ndim > 1) the feature importances\nfrom an estimator. Also optionally applies transformation.\n\nParameters\n----------\nestimator : estimator\n A scikit-learn estimator from which we want to get the feature\n importances.\n\ngetter : \"auto\", str or callable\n An attribute or a callable to get the feature importance. If `\"auto\"`,\n `estimator` is expected to expose `coef_` or `feature_importances`.\n\ntransform_func : {\"norm\", \"square\"}, default=None\n The transform to apply to the feature importances. By default (`None`)\n no transformation is applied.\n\nnorm_order : int, default=1\n The norm order to apply when `transform_func=\"norm\"`. Only applied\n when `importances.ndim > 1`.\n\nReturns\n-------\nimportances : ndarray of shape (n_features,)\n The features importances, optionally transformed." - } - ] - }, - { - "name": "sklearn.feature_selection._from_model", - "imports": [ - "import numpy as np", - "import numbers", - "from _base import SelectorMixin", - "from _base import _get_feature_importances", - "from base import BaseEstimator", - "from base import clone", - "from base import MetaEstimatorMixin", - "from utils._tags import _safe_tags", - "from utils.validation import check_is_fitted", - "from exceptions import NotFittedError", - "from utils.metaestimators import if_delegate_has_method", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "SelectFromModel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base estimator from which the transformer is built. This can be both a fitted (if ``prefit`` is set to True) or a non-fitted estimator. The estimator must have either a ``feature_importances_`` or ``coef_`` attribute after fitting." - }, - { - "name": "threshold", - "type": "Union[float, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The threshold value to use for feature selection. Features whose importance is greater or equal are kept while the others are discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is the median (resp. the mean) of the feature importances. A scaling factor (e.g., \"1.25*mean\") may also be used. If None and if the estimator has a parameter penalty set to l1, either explicitly or implicitly (e.g, Lasso), the threshold used is 1e-5. Otherwise, \"mean\" is used by default." - }, - { - "name": "prefit", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a prefit model is expected to be passed into the constructor directly or not. If True, ``transform`` must be called directly and SelectFromModel cannot be used with ``cross_val_score``, ``GridSearchCV`` and similar utilities that clone the estimator. Otherwise train the model using ``fit`` and then ``transform`` to do feature selection." - }, - { - "name": "norm_order", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Order of the norm used to filter the vectors of coefficients below ``threshold`` in the case where the ``coef_`` attribute of the estimator is of dimension 2." - }, - { - "name": "max_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of features to select. To only select based on ``max_features``, set ``threshold=-np.inf``. .. versionadded:: 0.20" - }, - { - "name": "importance_getter", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', uses the feature importance either through a ``coef_`` attribute or ``feature_importances_`` attribute of estimator. Also accepts a string that specifies an attribute name/path for extracting feature importance (implemented with `attrgetter`). For example, give `regressor_.coef_` in case of :class:`~sklearn.compose.TransformedTargetRegressor` or `named_steps.clf.feature_importances_` in case of :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. If `callable`, overrides the default feature importance getter. The callable is passed with the fitted estimator and it should return importance for each feature. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (integers that correspond to classes in classification, real numbers in regression)." - }, - { - "name": "**fit_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the SelectFromModel meta-transformer.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n**fit_params : Other estimator specific parameters\n\nReturns\n-------\nself : object" - }, - { - "name": "threshold_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (integers that correspond to classes in classification, real numbers in regression)." - }, - { - "name": "**fit_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the SelectFromModel meta-transformer only once.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n**fit_params : Other estimator specific parameters\n\nReturns\n-------\nself : object" - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object\n The base estimator from which the transformer is built.\n This can be both a fitted (if ``prefit`` is set to True)\n or a non-fitted estimator. The estimator must have either a\n ``feature_importances_`` or ``coef_`` attribute after fitting.\n\nthreshold : string or float, default=None\n The threshold value to use for feature selection. Features whose\n importance is greater or equal are kept while the others are\n discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is\n the median (resp. the mean) of the feature importances. A scaling\n factor (e.g., \"1.25*mean\") may also be used. If None and if the\n estimator has a parameter penalty set to l1, either explicitly\n or implicitly (e.g, Lasso), the threshold used is 1e-5.\n Otherwise, \"mean\" is used by default.\n\nprefit : bool, default=False\n Whether a prefit model is expected to be passed into the constructor\n directly or not. If True, ``transform`` must be called directly\n and SelectFromModel cannot be used with ``cross_val_score``,\n ``GridSearchCV`` and similar utilities that clone the estimator.\n Otherwise train the model using ``fit`` and then ``transform`` to do\n feature selection.\n\nnorm_order : non-zero int, inf, -inf, default=1\n Order of the norm used to filter the vectors of coefficients below\n ``threshold`` in the case where the ``coef_`` attribute of the\n estimator is of dimension 2.\n\nmax_features : int, default=None\n The maximum number of features to select.\n To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n .. versionadded:: 0.20\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a ``coef_``\n attribute or ``feature_importances_`` attribute of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : an estimator\n The base estimator from which the transformer is built.\n This is stored only when a non-fitted estimator is passed to the\n ``SelectFromModel``, i.e when prefit is False.\n\nthreshold_ : float\n The threshold value used for feature selection.\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SelectFromModel\n>>> from sklearn.linear_model import LogisticRegression\n>>> X = [[ 0.87, -1.34, 0.31 ],\n... [-2.79, -0.02, -0.85 ],\n... [-1.34, -0.48, -2.55 ],\n... [ 1.92, 1.48, 0.65 ]]\n>>> y = [0, 1, 0, 1]\n>>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n>>> selector.estimator_.coef_\narray([[-0.3252302 , 0.83462377, 0.49750423]])\n>>> selector.threshold_\n0.55245...\n>>> selector.get_support()\narray([False, True, False])\n>>> selector.transform(X)\narray([[-1.34],\n [-0.02],\n [-0.48],\n [ 1.48]])\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\nSequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights." - } - ], - "functions": [ - { - "name": "_calculate_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Interpret the threshold value" - } - ] - }, - { - "name": "sklearn.feature_selection._mutual_info", - "imports": [ - "import numpy as np", - "from scipy.sparse import issparse", - "from scipy.special import digamma", - "from metrics.cluster import mutual_info_score", - "from neighbors import NearestNeighbors", - "from neighbors import KDTree", - "from preprocessing import scale", - "from utils import check_random_state", - "from utils.fixes import _astype_copy_false", - "from utils.validation import check_array", - "from utils.validation import check_X_y", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets" - ], - "classes": [], - "functions": [ - { - "name": "_compute_mi_cc", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of two continuous random variables, must have an identical shape." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of two continuous random variables, must have an identical shape." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nearest neighbors to search for each point, see [1]_." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mutual information between two continuous variables.\n\nParameters\n----------\nx, y : ndarray, shape (n_samples,)\n Samples of two continuous random variables, must have an identical\n shape.\n\nn_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004." - }, - { - "name": "_compute_mi_cd", - "decorators": [], - "parameters": [ - { - "name": "c", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of a continuous random variable." - }, - { - "name": "d", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples of a discrete random variable." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nearest neighbors to search for each point, see [1]_." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mutual information between continuous and discrete variables.\n\nParameters\n----------\nc : ndarray, shape (n_samples,)\n Samples of a continuous random variable.\n\nd : ndarray, shape (n_samples,)\n Samples of a discrete random variable.\n\nn_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014." - }, - { - "name": "_compute_mi", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mutual information between two variables.\n\nThis is a simple wrapper which selects a proper function to call based on\nwhether `x` and `y` are discrete or not." - }, - { - "name": "_iterate_columns", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix over which to iterate." - }, - { - "name": "columns", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of columns to iterate over. If None, iterate over all columns." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Iterate over columns of a matrix.\n\nParameters\n----------\nX : ndarray or csc_matrix, shape (n_samples, n_features)\n Matrix over which to iterate.\n\ncolumns : iterable or None, default=None\n Indices of columns to iterate over. If None, iterate over all columns.\n\nYields\n------\nx : ndarray, shape (n_samples,)\n Columns of `X` in dense format." - }, - { - "name": "_estimate_mi", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "discrete_features", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If bool, then determines whether to consider all features discrete or continuous. If array, then it should be either a boolean mask with shape (n_features,) or array with indices of discrete features. If 'auto', it is assigned to False for dense `X` and to True for sparse `X`." - }, - { - "name": "discrete_target", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to consider `y` as a discrete variable." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use for MI estimation for continuous variables, see [1]_ and [2]_. Higher values reduce variance of the estimation, but could introduce a bias." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of the given data. If set to False, the initial data will be overwritten." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for adding small noise to continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate mutual information between the features and the target.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\ndiscrete_target : bool, default=False\n Whether to consider `y` as a discrete variable.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n A negative value will be replaced by 0.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014." - }, - { - "name": "mutual_info_regression", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "discrete_features", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If bool, then determines whether to consider all features discrete or continuous. If array, then it should be either a boolean mask with shape (n_features,) or array with indices of discrete features. If 'auto', it is assigned to False for dense `X` and to True for sparse `X`." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use for MI estimation for continuous variables, see [2]_ and [3]_. Higher values reduce variance of the estimation, but could introduce a bias." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of the given data. If set to False, the initial data will be overwritten." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for adding small noise to continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate mutual information for a continuous target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\nNotes\n-----\n1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\nReferences\n----------\n.. [1] `Mutual Information\n `_\n on Wikipedia.\n.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n.. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector\", Probl. Peredachi Inf., 23:2 (1987), 9-16" - }, - { - "name": "mutual_info_classif", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "discrete_features", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If bool, then determines whether to consider all features discrete or continuous. If array, then it should be either a boolean mask with shape (n_features,) or array with indices of discrete features. If 'auto', it is assigned to False for dense `X` and to True for sparse `X`." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use for MI estimation for continuous variables, see [2]_ and [3]_. Higher values reduce variance of the estimation, but could introduce a bias." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to make a copy of the given data. If set to False, the initial data will be overwritten." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for adding small noise to continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate mutual information for a discrete target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\nNotes\n-----\n1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\nReferences\n----------\n.. [1] `Mutual Information\n `_\n on Wikipedia.\n.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n.. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector:, Probl. Peredachi Inf., 23:2 (1987), 9-16" - } - ] - }, - { - "name": "sklearn.feature_selection._rfe", - "imports": [ - "import numpy as np", - "import numbers", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from utils.metaestimators import if_delegate_has_method", - "from utils.metaestimators import _safe_split", - "from utils._tags import _safe_tags", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from base import clone", - "from base import is_classifier", - "from model_selection import check_cv", - "from model_selection._validation import _score", - "from metrics import check_scoring", - "from _base import SelectorMixin", - "from _base import _get_feature_importances" - ], - "classes": [ - { - "name": "RFE", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A supervised learning estimator with a ``fit`` method that provides information about feature importance (e.g. `coef_`, `feature_importances_`)." - }, - { - "name": "n_features_to_select", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to select. If `None`, half of the features are selected. If integer, the parameter is the absolute number of features to select. If float between 0 and 1, it is the fraction of features to select. .. versionchanged:: 0.24 Added float values for fractions." - }, - { - "name": "step", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "If greater than or equal to 1, then ``step`` corresponds to the (integer) number of features to remove at each iteration. If within (0.0, 1.0), then ``step`` corresponds to the percentage (rounded down) of features to remove at each iteration." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls verbosity of output." - }, - { - "name": "importance_getter", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', uses the feature importance either through a `coef_` or `feature_importances_` attributes of estimator. Also accepts a string that specifies an attribute name/path for extracting feature importance (implemented with `attrgetter`). For example, give `regressor_.coef_` in case of :class:`~sklearn.compose.TransformedTargetRegressor` or `named_steps.clf.feature_importances_` in case of class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. If `callable`, overrides the default feature importance getter. The callable is passed with the fitted estimator and it should return importance for each feature. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimator_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the RFE model and then the underlying estimator on the selected\n features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce X to the selected features and then predict using the\n underlying estimator.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\ny : array of shape [n_samples]\n The predicted target values." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce X to the selected features and then return the score of the\n underlying estimator.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\ny : array of shape [n_samples]\n The target values." - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : array, shape = [n_samples, n_classes] or [n_samples]\n The decision function of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification produce an array of shape\n [n_samples]." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\np : array of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities for X.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\np : array of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance\n (e.g. `coef_`, `feature_importances_`).\n\nn_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\n .. versionchanged:: 0.24\n Added float values for fractions.\n\nstep : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n\nverbose : int, default=0\n Controls verbosity of output.\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\nn_features_ : int\n The number of selected features.\n\nranking_ : ndarray of shape (n_features,)\n The feature ranking, such that ``ranking_[i]`` corresponds to the\n ranking position of the i-th feature. Selected (i.e., estimated\n best) features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n The mask of selected features.\n\nExamples\n--------\nThe following example shows how to retrieve the 5 most informative\nfeatures in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFE\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFE(estimator, n_features_to_select=5, step=1)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True, True, True, True, True, False, False, False, False,\n False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nSee Also\n--------\nRFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n weights.\nSequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002." - }, - { - "name": "RFECV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A supervised learning estimator with a ``fit`` method that provides information about feature importance either through a ``coef_`` attribute or through a ``feature_importances_`` attribute." - }, - { - "name": "step", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "If greater than or equal to 1, then ``step`` corresponds to the (integer) number of features to remove at each iteration. If within (0.0, 1.0), then ``step`` corresponds to the percentage (rounded down) of features to remove at each iteration. Note that the last iteration may remove fewer than ``step`` features in order to reach ``min_features_to_select``." - }, - { - "name": "min_features_to_select", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of features to be selected. This number of features will always be scored, even if the difference between the original feature count and ``min_features_to_select`` isn't divisible by ``step``. .. versionadded:: 0.20" - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. If the estimator is a classifier or if ``y`` is neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value of None changed from 3-fold to 5-fold." - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls verbosity of output." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of cores to run in parallel while fitting across folds. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18" - }, - { - "name": "importance_getter", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If 'auto', uses the feature importance either through a `coef_` or `feature_importances_` attributes of estimator. Also accepts a string that specifies an attribute name/path for extracting feature importance. For example, give `regressor_.coef_` in case of :class:`~sklearn.compose.TransformedTargetRegressor` or `named_steps.clf.feature_importances_` in case of :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. If `callable`, overrides the default feature importance getter. The callable is passed with the fitted estimator and it should return importance for each feature. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where `n_samples` is the number of samples and `n_features` is the total number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers for classification, real numbers for regression)." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the RFE model and automatically tune the number of selected\n features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where `n_samples` is the number of samples and\n `n_features` is the total number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers for classification, real numbers for\n regression).\n\ngroups : array-like of shape (n_samples,) or None, default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n .. versionadded:: 0.20" - } - ], - "docstring": "Feature ranking with recursive feature elimination and cross-validated\nselection of the best number of features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance either through a ``coef_``\n attribute or through a ``feature_importances_`` attribute.\n\nstep : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n Note that the last iteration may remove fewer than ``step`` features in\n order to reach ``min_features_to_select``.\n\nmin_features_to_select : int, default=1\n The minimum number of features to be selected. This number of features\n will always be scored, even if the difference between the original\n feature count and ``min_features_to_select`` isn't divisible by\n ``step``.\n\n .. versionadded:: 0.20\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n estimator is a classifier or if ``y`` is neither binary nor multiclass,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value of None changed from 3-fold to 5-fold.\n\nscoring : string, callable or None, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nverbose : int, default=0\n Controls verbosity of output.\n\nn_jobs : int or None, default=None\n Number of cores to run in parallel while fitting across folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance.\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\ngrid_scores_ : ndarray of shape (n_subsets_of_features,)\n The cross-validation scores such that\n ``grid_scores_[i]`` corresponds to\n the CV score of the i-th subset of features.\n\nn_features_ : int\n The number of selected features with cross-validation.\n\nranking_ : narray of shape (n_features,)\n The feature ranking, such that `ranking_[i]`\n corresponds to the ranking\n position of the i-th feature.\n Selected (i.e., estimated best)\n features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n The mask of selected features.\n\nNotes\n-----\nThe size of ``grid_scores_`` is equal to\n``ceil((n_features - min_features_to_select) / step) + 1``,\nwhere step is the number of features removed at each iteration.\n\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\nThe following example shows how to retrieve the a-priori not known 5\ninformative features in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFECV\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFECV(estimator, step=1, cv=5)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True, True, True, True, True, False, False, False, False,\n False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\nSee Also\n--------\nRFE : Recursive feature elimination.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002." - } - ], - "functions": [ - { - "name": "_rfe_single_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the score for a fit across one fold." - } - ] - }, - { - "name": "sklearn.feature_selection._sequential", - "imports": [ - "import numbers", - "import numpy as np", - "from _base import SelectorMixin", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from base import clone", - "from utils._tags import _safe_tags", - "from utils.validation import check_is_fitted", - "from model_selection import cross_val_score" - ], - "classes": [ - { - "name": "SequentialFeatureSelector", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An unfitted estimator." - }, - { - "name": "n_features_to_select", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to select. If `None`, half of the features are selected. If integer, the parameter is the absolute number of features to select. If float between 0 and 1, it is the fraction of features to select." - }, - { - "name": "direction: {'forward'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform forward selection or backward selection." - }, - { - "name": "'backward'}", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform forward selection or backward selection." - }, - { - "name": "default='forward'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform forward selection or backward selection." - }, - { - "name": "scoring", - "type": "Union[Callable, str, Dict]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. If None, the estimator's score method is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. When evaluating a new feature to add or remove, the cross-validation procedure is parallel over the folds. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn the features to select.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors.\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : object" - }, - { - "name": "_get_best_new_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n An unfitted estimator.\n\nn_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\ndirection: {'forward', 'backward'}, default='forward'\n Whether to perform forward selection or backward selection.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. When evaluating a new feature to\n add or remove, the cross-validation procedure is parallel over the\n folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nn_features_to_select_ : int\n The number of features that were selected.\n\nsupport_ : ndarray of shape (n_features,), dtype=bool\n The mask of selected features.\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination based on importance weights, with\n automatic selection of the number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n weights.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SequentialFeatureSelector\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n>>> sfs.fit(X, y)\nSequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n n_features_to_select=3)\n>>> sfs.get_support()\narray([ True, False, True, True])\n>>> sfs.transform(X).shape\n(150, 3)" - } - ], - "functions": [] - }, - { - "name": "sklearn.feature_selection._univariate_selection", - "imports": [ - "import numpy as np", - "import warnings", - "from scipy import special", - "from scipy import stats", - "from scipy.sparse import issparse", - "from base import BaseEstimator", - "from preprocessing import LabelBinarizer", - "from utils import as_float_array", - "from utils import check_array", - "from utils import check_X_y", - "from utils import safe_sqr", - "from utils import safe_mask", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import row_norms", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from _base import SelectorMixin" - ], - "classes": [ - { - "name": "_BaseFilter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Run score function on (X, y) and get the appropriate features.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nReturns\n-------\nself : object" - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Initialize the univariate feature selection.\n\nParameters\n----------\nscore_func : callable\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores." - }, - { - "name": "SelectPercentile", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores. Default is f_classif (see below \"See Also\"). The default function only works with classification tasks. .. versionadded:: 0.18" - }, - { - "name": "percentile", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Percent of features to keep." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\npercentile : int, default=10\n Percent of features to keep.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectPercentile, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n>>> X_new.shape\n(1797, 7)\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectKBest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores. Default is f_classif (see below \"See Also\"). The default function only works with classification tasks. .. versionadded:: 0.18" - }, - { - "name": "k", - "type": "Union[Literal[\"all\"], int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of top features to select. The \"all\" option bypasses selection, for use in a parameter search." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\nk : int or \"all\", default=10\n Number of top features to select.\n The \"all\" option bypasses selection, for use in a parameter search.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n>>> X_new.shape\n(1797, 20)\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectFpr", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below \"See Also\"). The default function only works with classification tasks." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "5e-2", - "limitation": null, - "ignored": false, - "docstring": "The highest p-value for features to be kept." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest p-value for features to be kept.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFpr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nmutual_info_classif: Mutual information for a discrete target.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectFdr", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below \"See Also\"). The default function only works with classification tasks." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "5e-2", - "limitation": null, - "ignored": false, - "docstring": "The highest uncorrected p-value for features to keep." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Filter: Select the p-values for an estimated false discovery rate\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFdr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/False_discovery_rate\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a contnuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "SelectFwe", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below \"See Also\"). The default function only works with classification tasks." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "5e-2", - "limitation": null, - "ignored": false, - "docstring": "The highest uncorrected p-value for features to keep." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Filter: Select the p-values corresponding to Family-wise error rate\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFwe, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 15)\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode." - }, - { - "name": "GenericUnivariateSelect", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": true, - "default": "f_classif", - "limitation": null, - "ignored": false, - "docstring": "Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). For modes 'percentile' or 'kbest' it can return a single array scores." - }, - { - "name": "mode", - "type": "Literal['percentile', 'k_best', 'fpr', 'fdr', 'fwe']", - "hasDefault": true, - "default": "'percentile'", - "limitation": null, - "ignored": false, - "docstring": "Feature selection mode." - }, - { - "name": "param", - "type": "float", - "hasDefault": true, - "default": "1e-5", - "limitation": null, - "ignored": false, - "docstring": "Parameter of the corresponding mode." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_selector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n a single array scores.\n\nmode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n Feature selection mode.\n\nparam : float or int depending on the feature selection mode, default=1e-5\n Parameter of the corresponding mode.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned scores only.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n>>> X_new = transformer.fit_transform(X, y)\n>>> X_new.shape\n(569, 20)\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate." - } - ], - "functions": [ - { - "name": "_clean_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fixes Issue #1240: NaNs can't be properly compared, so change them to the\nsmallest value of scores's dtype. -inf seems to be unreliable." - }, - { - "name": "f_oneway", - "decorators": [], - "parameters": [ - { - "name": "*args", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "sample1, sample2... The sample measurements should be given as arguments." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs a 1-way ANOVA.\n\nThe one-way ANOVA tests the null hypothesis that 2 or more groups have\nthe same population mean. The test is applied to samples from two or\nmore groups, possibly with differing sizes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*args : array-like, sparse matrices\n sample1, sample2... The sample measurements should be given as\n arguments.\n\nReturns\n-------\nF-value : float\n The computed F-value of the test.\np-value : float\n The associated p-value from the F-distribution.\n\nNotes\n-----\nThe ANOVA test has important assumptions that must be satisfied in order\nfor the associated p-value to be valid.\n\n1. The samples are independent\n2. Each sample is from a normally distributed population\n3. The population standard deviations of the groups are all equal. This\n property is known as homoscedasticity.\n\nIf these assumptions are not true for a given set of data, it may still be\npossible to use the Kruskal-Wallis H-test (`scipy.stats.kruskal`_) although\nwith some loss of power.\n\nThe algorithm is from Heiman[2], pp.394-7.\n\nSee ``scipy.stats.f_oneway`` that should give the same results while\nbeing less efficient.\n\nReferences\n----------\n\n.. [1] Lowry, Richard. \"Concepts and Applications of Inferential\n Statistics\". Chapter 14.\n http://faculty.vassar.edu/lowry/ch14pt1.html\n\n.. [2] Heiman, G.W. Research Methods in Statistics. 2002." - }, - { - "name": "f_classif", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of regressors that will be tested sequentially." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the ANOVA F-value for the provided sample.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} shape = [n_samples, n_features]\n The set of regressors that will be tested sequentially.\n\ny : array of shape(n_samples)\n The data matrix.\n\nReturns\n-------\nF : array, shape = [n_features,]\n The set of F values.\n\npval : array, shape = [n_features,]\n The set of p-values.\n\nSee Also\n--------\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks." - }, - { - "name": "_chisquare", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast replacement for scipy.stats.chisquare.\n\nVersion from https://github.com/scipy/scipy/pull/2525 with additional\noptimizations." - }, - { - "name": "chi2", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample vectors." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector (class labels)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute chi-squared stats between each non-negative feature and class.\n\nThis score can be used to select the n_features features with the\nhighest values for the test chi-squared statistic from X, which must\ncontain only non-negative features such as booleans or frequencies\n(e.g., term counts in document classification), relative to the classes.\n\nRecall that the chi-square test measures dependence between stochastic\nvariables, so using this function \"weeds out\" the features that are the\nmost likely to be independent of class and therefore irrelevant for\nclassification.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample vectors.\n\ny : array-like of shape (n_samples,)\n Target vector (class labels).\n\nReturns\n-------\nchi2 : array, shape = (n_features,)\n chi2 statistics of each feature.\npval : array, shape = (n_features,)\n p-values of each feature.\n\nNotes\n-----\nComplexity of this algorithm is O(n_classes * n_features).\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nf_regression : F-value between label/feature for regression tasks." - }, - { - "name": "f_regression", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of regressors that will be tested sequentially." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix" - }, - { - "name": "center", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, X and y will be centered." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Univariate linear regression tests.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThis is done in 2 steps:\n\n1. The correlation between each regressor and the target is computed,\n that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *\n std(y)).\n2. It is converted to an F score then to a p-value.\n\nFor more on usage see the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} shape = (n_samples, n_features)\n The set of regressors that will be tested sequentially.\n\ny : array of shape(n_samples).\n The data matrix\n\ncenter : bool, default=True\n If true, X and y will be centered.\n\nReturns\n-------\nF : array, shape=(n_features,)\n F values of features.\n\npval : array, shape=(n_features,)\n p-values of F-scores.\n\nSee Also\n--------\nmutual_info_regression : Mutual information for a continuous target.\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nSelectPercentile : Select features based on percentile of the highest\n scores." - } - ] - }, - { - "name": "sklearn.feature_selection._variance_threshold", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from _base import SelectorMixin", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.sparsefuncs import min_max_axis", - "from utils.validation import check_is_fitted" - ], - "classes": [ - { - "name": "VarianceThreshold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Features with a training-set variance lower than this threshold will be removed. The default is to keep all features with non-zero variance, i.e. remove the features that have the same value in all samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample vectors from which to compute variances." - }, - { - "name": "y", - "type": "Any", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with sklearn.pipeline.Pipeline." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Learn empirical variances from X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Sample vectors from which to compute variances.\n\ny : any, default=None\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\nReturns\n-------\nself" - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nthreshold : float, default=0\n Features with a training-set variance lower than this threshold will\n be removed. The default is to keep all features with non-zero variance,\n i.e. remove the features that have the same value in all samples.\n\nAttributes\n----------\nvariances_ : array, shape (n_features,)\n Variances of individual features.\n\nNotes\n-----\nAllows NaN in the input.\nRaises ValueError if no feature in X meets the variance threshold.\n\nExamples\n--------\nThe following dataset has integer features, two of which are the same\nin every sample. These are removed with the default setting for threshold::\n\n >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n >>> selector = VarianceThreshold()\n >>> selector.fit_transform(X)\n array([[2, 0],\n [1, 4],\n [1, 1]])" - } - ], - "functions": [] - }, - { - "name": "sklearn.feature_selection", - "imports": [ - "from _univariate_selection import chi2", - "from _univariate_selection import f_classif", - "from _univariate_selection import f_oneway", - "from _univariate_selection import f_regression", - "from _univariate_selection import SelectPercentile", - "from _univariate_selection import SelectKBest", - "from _univariate_selection import SelectFpr", - "from _univariate_selection import SelectFdr", - "from _univariate_selection import SelectFwe", - "from _univariate_selection import GenericUnivariateSelect", - "from _variance_threshold import VarianceThreshold", - "from _rfe import RFE", - "from _rfe import RFECV", - "from _from_model import SelectFromModel", - "from _sequential import SequentialFeatureSelector", - "from _mutual_info import mutual_info_regression", - "from _mutual_info import mutual_info_classif", - "from _base import SelectorMixin" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.feature_selection.tests.test_base", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import sparse as sp", - "from numpy.testing import assert_array_equal", - "from sklearn.base import BaseEstimator", - "from sklearn.feature_selection._base import SelectorMixin", - "from sklearn.utils import check_array" - ], - "classes": [ - { - "name": "StepSelector", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_support_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Retain every `step` features (beginning with 0)" - } - ], - "functions": [ - { - "name": "test_transform_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_chi2", - "imports": [ - "import warnings", - "import numpy as np", - "import pytest", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csr_matrix", - "import scipy.stats", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import chi2", - "from sklearn.feature_selection._univariate_selection import _chisquare", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "mkchi2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make k-best chi2 selector" - }, - { - "name": "test_chi2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi2_coo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi2_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi2_unused_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chisquare", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_feature_select", - "imports": [ - "import itertools", - "import warnings", - "import numpy as np", - "from scipy import stats", - "from scipy import sparse", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils import safe_mask", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.feature_selection import chi2", - "from sklearn.feature_selection import f_classif", - "from sklearn.feature_selection import f_oneway", - "from sklearn.feature_selection import f_regression", - "from sklearn.feature_selection import mutual_info_classif", - "from sklearn.feature_selection import mutual_info_regression", - "from sklearn.feature_selection import SelectPercentile", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import SelectFpr", - "from sklearn.feature_selection import SelectFdr", - "from sklearn.feature_selection import SelectFwe", - "from sklearn.feature_selection import GenericUnivariateSelect" - ], - "classes": [], - "functions": [ - { - "name": "test_f_oneway_vs_scipy_stats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_oneway_ints", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_regression_input_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_regression_center", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_classif_multi_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_classif_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_all", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_heuristics_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_best_scores_kept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_percentile_regression_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_percentile", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_kbest_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_heuristics_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_boundary_case_ch2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_fdr_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_select_fwe_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_selectkbest_tiebreaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_selectpercentile_tiebreaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tied_pvalues", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorefunc_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tied_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_func_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_f_classif_constant_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_feature_selected", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_from_model", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn import datasets", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import Lasso", - "from sklearn.svm import LinearSVC", - "from sklearn.feature_selection import SelectFromModel", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.linear_model import PassiveAggressiveClassifier", - "from sklearn.base import BaseEstimator", - "from sklearn.pipeline import make_pipeline", - "from sklearn.decomposition import PCA" - ], - "classes": [ - { - "name": "NaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoNaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NaNTagRandomForest", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "FixedImportanceEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_estimator_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features_dim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features_tiebreak", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_threshold_and_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coef_default_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2d_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calling_fit_reinitializes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prefit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_threshold_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_threshold_without_refitting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_accepts_nan_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_accepts_nan_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allow_nan_tag_comes_from_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pca_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importance_getter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_mutual_info", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import csr_matrix", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.feature_selection._mutual_info import _compute_mi", - "from sklearn.feature_selection import mutual_info_regression", - "from sklearn.feature_selection import mutual_info_classif" - ], - "classes": [], - "functions": [ - { - "name": "test_compute_mi_dd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_mi_cc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_mi_cd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_mi_cd_unique_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_classif_discrete", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_classif_mixed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_options", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_rfe", - "imports": [ - "from operator import attrgetter", - "import pytest", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from scipy import sparse", - "from sklearn.feature_selection import RFE", - "from sklearn.feature_selection import RFECV", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_friedman1", - "from sklearn.metrics import zero_one_loss", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.svm import LinearSVR", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import GroupKFold", - "from sklearn.compose import TransformedTargetRegressor", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import get_scorer", - "from io import StringIO", - "import sys" - ], - "classes": [ - { - "name": "MockClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test recursive feature elimination" - } - ], - "functions": [ - { - "name": "test_rfe_features_importance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_invalid_n_features_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_percent_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_mockclassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv_mockclassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv_verbose_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfecv_grid_scores_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_estimator_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_min_step", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_number_of_subsets_of_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_cv_n_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_cv_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_wrapped_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_importance_getter_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rfe_allow_nan_inf_in_x", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_w_pipeline_2d_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_sequential", - "imports": [ - "import pytest", - "import scipy", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.pipeline import make_pipeline", - "from sklearn.feature_selection import SequentialFeatureSelector", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingRegressor" - ], - "classes": [], - "functions": [ - { - "name": "test_bad_n_features_to_select", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_direction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_to_select", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_to_select_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests.test_variance_threshold", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from scipy.sparse import bsr_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from sklearn.feature_selection import VarianceThreshold" - ], - "classes": [], - "functions": [ - { - "name": "test_zero_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_variance_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_variance_floating_point_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_variance_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.feature_selection.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.gaussian_process.kernels", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from collections import namedtuple", - "import math", - "from inspect import signature", - "import numpy as np", - "from scipy.special import kv", - "from scipy.special import gamma", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import cdist", - "from scipy.spatial.distance import squareform", - "from metrics.pairwise import pairwise_kernels", - "from base import clone", - "from utils.validation import _num_samples", - "import warnings", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "Hyperparameter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__new__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A kernel hyperparameter's specification in form of a namedtuple.\n\n.. versionadded:: 0.18\n\nAttributes\n----------\nname : str\n The name of the hyperparameter. Note that a kernel using a\n hyperparameter with name \"x\" must have the attributes self.x and\n self.x_bounds\n\nvalue_type : str\n The type of the hyperparameter. Currently, only \"numeric\"\n hyperparameters are supported.\n\nbounds : pair of floats >= 0 or \"fixed\"\n The lower and upper bound on the parameter. If n_elements>1, a pair\n of 1d array with n_elements each may be given alternatively. If\n the string \"fixed\" is passed as bounds, the hyperparameter's value\n cannot be changed.\n\nn_elements : int, default=1\n The number of elements of the hyperparameter value. Defaults to 1,\n which corresponds to a scalar hyperparameter. n_elements > 1\n corresponds to a hyperparameter which is vector-valued,\n such as, e.g., anisotropic length-scales.\n\nfixed : bool, default=None\n Whether the value of this hyperparameter is fixed, i.e., cannot be\n changed during hyperparameter tuning. If None is passed, the \"fixed\" is\n derived based on the given bounds.\n\nExamples\n--------\n>>> from sklearn.gaussian_process.kernels import ConstantKernel\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import Hyperparameter\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ConstantKernel(constant_value=1.0,\n... constant_value_bounds=(0.0, 10.0))\n\nWe can access each hyperparameter:\n\n>>> for hyperparameter in kernel.hyperparameters:\n... print(hyperparameter)\nHyperparameter(name='constant_value', value_type='numeric',\nbounds=array([[ 0., 10.]]), n_elements=1, fixed=False)\n\n>>> params = kernel.get_params()\n>>> for key in sorted(params): print(f\"{key} : {params[key]}\")\nconstant_value : 1.0\nconstant_value_bounds : (0.0, 10.0)" - }, - { - "name": "Kernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Set the parameters of this kernel.\n\nThe method works on simple kernels as well as on nested kernels.\nThe latter have parameters of the form ``__``\nso that it's possible to update each component of a nested object.\n\nReturns\n-------\nself" - }, - { - "name": "clone_with_theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The hyperparameters" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a clone of self with given hyperparameters theta.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The hyperparameters" - }, - { - "name": "n_dims", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of non-fixed hyperparameters of the kernel." - }, - { - "name": "hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of all hyperparameter specifications." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__add__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__radd__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__mul__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__rmul__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__pow__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the kernel." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is defined on fixed-length feature\nvectors or generic objects. Defaults to True for backward\ncompatibility." - }, - { - "name": "_check_bounds_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called after fitting to warn if bounds may have been too tight." - } - ], - "docstring": "Base class for all kernels.\n\n.. versionadded:: 0.18" - }, - { - "name": "NormalizedKernelMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - } - ], - "docstring": "Mixin for kernels which are normalized: k(X, X)=1.\n\n.. versionadded:: 0.18" - }, - { - "name": "StationaryKernelMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - } - ], - "docstring": "Mixin for kernels which are stationary: k(X, Y)= f(X-Y).\n\n.. versionadded:: 0.18" - }, - { - "name": "GenericKernelMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Whether the kernel works only on fixed-length feature vectors." - } - ], - "docstring": "Mixin for kernels which operate on generic objects such as variable-\nlength sequences, trees, and graphs.\n\n.. versionadded:: 0.22" - }, - { - "name": "CompoundKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernels", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The other kernels" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : array of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : array of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nNote that this compound kernel returns the results of all simple kernel\nstacked along an additional axis.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of the\n kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims, n_kernels), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is defined on discrete structures. " - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X, n_kernels)\n Diagonal of kernel k(X, X)" - } - ], - "docstring": "Kernel which is composed of a set of other kernels.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernels : list of Kernels\n The other kernels\n\nExamples\n--------\n>>> from sklearn.gaussian_process.kernels import WhiteKernel\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> from sklearn.gaussian_process.kernels import CompoundKernel\n>>> kernel = CompoundKernel(\n... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)])\n>>> print(kernel.bounds)\n[[-11.51292546 11.51292546]\n [-11.51292546 11.51292546]]\n>>> print(kernel.n_dims)\n2\n>>> print(kernel.theta)\n[1.09861229 0.69314718]" - }, - { - "name": "KernelOperator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of all hyperparameter." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - } - ], - "docstring": "Base class for all kernel operators.\n\n.. versionadded:: 0.18" - }, - { - "name": "Sum", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "k1", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The first base-kernel of the sum-kernel" - }, - { - "name": "k2", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The second base-kernel of the sum-kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n The first base-kernel of the sum-kernel\n\nk2 : Kernel\n The second base-kernel of the sum-kernel\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Sum(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 + RBF(length_scale=1)" - }, - { - "name": "Product", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "k1", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The first base-kernel of the product-kernel" - }, - { - "name": "k2", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The second base-kernel of the product-kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_Y, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n The first base-kernel of the product-kernel\n\nk2 : Kernel\n The second base-kernel of the product-kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RBF, Product,\n... ConstantKernel)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Product(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 * RBF(length_scale=1)" - }, - { - "name": "Exponentiation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The base kernel" - }, - { - "name": "exponent", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The exponent for the base kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [ - { - "name": "deep", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values." - }, - { - "name": "hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns a list of all hyperparameter." - }, - { - "name": "theta", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The non-fixed, log-transformed hyperparameters of the kernel" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel" - }, - { - "name": "bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta" - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_Y, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "requires_vector_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is defined on discrete structures. " - } - ], - "docstring": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : Kernel\n The base kernel\n\nexponent : float\n The exponent for the base kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RationalQuadratic,\n... Exponentiation)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Exponentiation(RationalQuadratic(), exponent=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.419...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([635.5...]), array([0.559...]))" - }, - { - "name": "ConstantKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "constant_value", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The constant value which defines the covariance: k(x_1, x_2) = constant_value" - }, - { - "name": "constant_value_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on `constant_value`. If set to \"fixed\", `constant_value` cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_constant_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n kernel = RBF() + 2\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nconstant_value : float, default=1.0\n The constant value which defines the covariance:\n k(x_1, x_2) = constant_value\n\nconstant_value_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on `constant_value`.\n If set to \"fixed\", `constant_value` cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = RBF() + ConstantKernel(constant_value=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3696...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([606.1...]), array([0.24...]))" - }, - { - "name": "WhiteKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "noise_level", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter controlling the noise level (variance)" - }, - { - "name": "noise_level_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'noise_level'. If set to \"fixed\", 'noise_level' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_noise_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) is evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nnoise_level : float, default=1.0\n Parameter controlling the noise level (variance)\n\nnoise_level_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'noise_level'.\n If set to \"fixed\", 'noise_level' cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1... ]), array([316.6..., 316.6...]))" - }, - { - "name": "RBF", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel. If a float, an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of l defines the length-scale of the respective feature dimension." - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "anisotropic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\n.. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8354..., 0.03228..., 0.1322...],\n [0.7906..., 0.0652..., 0.1441...]])" - }, - { - "name": "Matern", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel. If a float, an isotropic kernel is used. If an array, an anisotropic kernel is used where each dimension of l defines the length-scale of the respective feature dimension." - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - }, - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The parameter nu controlling the smoothness of the learned function. The smaller nu, the less smooth the approximated function is. For nu=inf, the kernel becomes equivalent to the RBF kernel and for nu=0.5 to the absolute exponential kernel. Important intermediate values are nu=1.5 (once differentiable functions) and nu=2.5 (twice differentiable functions). Note that values of nu not in [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost (appr. 10 times higher) since they require to evaluate the modified Bessel function. Furthermore, in contrast to l, nu is kept fixed to its initial value and not optimized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n \\Bigg)^\\nu K_\\nu\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\n\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nnu : float, default=1.5\n The parameter nu controlling the smoothness of the learned function.\n The smaller nu, the less smooth the approximated function is.\n For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n nu=0.5 to the absolute exponential kernel. Important intermediate\n values are nu=1.5 (once differentiable functions) and nu=2.5\n (twice differentiable functions). Note that values of nu not in\n [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n (appr. 10 times higher) since they require to evaluate the modified\n Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n its initial value and not optimized.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8513..., 0.0368..., 0.1117...],\n [0.8086..., 0.0693..., 0.1220...]])" - }, - { - "name": "RationalQuadratic", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel." - }, - { - "name": "alpha", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Scale mixture parameter" - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - }, - { - "name": "alpha_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'alpha'. If set to \"fixed\", 'alpha' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\left(\n 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float > 0, default=1.0\n The length scale of the kernel.\n\nalpha : float > 0, default=1.0\n Scale mixture parameter\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nalpha_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'alpha'.\n If set to \"fixed\", 'alpha' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8881..., 0.0566..., 0.05518...],\n [0.8678..., 0.0707... , 0.0614...]])" - }, - { - "name": "ExpSineSquared", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "length_scale", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The length scale of the kernel." - }, - { - "name": "periodicity", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The periodicity of the kernel." - }, - { - "name": "length_scale_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'length_scale'. If set to \"fixed\", 'length_scale' cannot be changed during hyperparameter tuning." - }, - { - "name": "periodicity_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'periodicity'. If set to \"fixed\", 'periodicity' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the length scale" - }, - { - "name": "hyperparameter_periodicity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\text{exp}\\left(-\n \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\n\nlength_scale : float > 0, default=1.0\n The length scale of the kernel.\n\nperiodicity : float > 0, default=1.0\n The periodicity of the kernel.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nperiodicity_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'periodicity'.\n If set to \"fixed\", 'periodicity' cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import ExpSineSquared\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ExpSineSquared(length_scale=1, periodicity=1)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.0144...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))" - }, - { - "name": "DotProduct", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "sigma_0", - "type": null, - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter controlling the inhomogenity of the kernel. If sigma_0=0, the kernel is homogenous." - }, - { - "name": "sigma_0_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'sigma_0'. If set to \"fixed\", 'sigma_0' cannot be changed during hyperparameter tuning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_sigma_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y).\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)." - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsigma_0 : float >= 0, default=1.0\n Parameter controlling the inhomogenity of the kernel. If sigma_0=0,\n the kernel is homogenous.\n\nsigma_0_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'sigma_0'.\n If set to \"fixed\", 'sigma_0' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))" - }, - { - "name": "PairwiseKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Parameter gamma of the pairwise kernel specified by metric. It should be positive." - }, - { - "name": "gamma_bounds", - "type": "Literal[\"fixed\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper bound on 'gamma'. If set to \"fixed\", 'gamma' cannot be changed during hyperparameter tuning." - }, - { - "name": "metric", - "type": "Literal[\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - }, - { - "name": "pairwise_kernels_kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "All entries of this dict (if any) are passed as keyword arguments to the pairwise kernel function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_gamma", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Determines whether the gradient with respect to the log of the kernel hyperparameter is computed. Only supported when Y is None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True." - }, - { - "name": "diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Left argument of the returned kernel k(X, Y)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)" - }, - { - "name": "is_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the kernel is stationary. " - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n kernels support only isotropic distances. The parameter gamma is\n considered to be a hyperparameter and may be optimized. The other\n kernel parameters are set directly at initialization and are kept\n fixed.\n\n.. versionadded:: 0.18\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter gamma of the pairwise kernel specified by metric. It should\n be positive.\n\ngamma_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'gamma'.\n If set to \"fixed\", 'gamma' cannot be changed during\n hyperparameter tuning.\n\nmetric : {\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"} or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\npairwise_kernels_kwargs : dict, default=None\n All entries of this dict (if any) are passed as keyword arguments to\n the pairwise kernel function.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import PairwiseKernel\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = PairwiseKernel(metric='rbf')\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8880..., 0.05663..., 0.05532...],\n [0.8676..., 0.07073..., 0.06165...]])" - } - ], - "functions": [ - { - "name": "_check_length_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_approx_fprime", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process._gpc", - "imports": [ - "from operator import itemgetter", - "import numpy as np", - "from scipy.linalg import cholesky", - "from scipy.linalg import cho_solve", - "from scipy.linalg import solve", - "import scipy.optimize", - "from scipy.special import erf", - "from scipy.special import expit", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import clone", - "from kernels import RBF", - "from kernels import CompoundKernel", - "from kernels import ConstantKernel as C", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils import check_random_state", - "from utils.optimize import _check_optimize_result", - "from preprocessing import LabelEncoder", - "from multiclass import OneVsRestClassifier", - "from multiclass import OneVsOneClassifier", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "_BinaryGaussianProcessClassifierLaplace", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kernel specifying the covariance function of the GP. If None is passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that the kernel's hyperparameters are optimized during fitting." - }, - { - "name": "optimizer", - "type": "Union[Callable, Literal['fmin_l_bfgs_b']]", - "hasDefault": true, - "default": "'fmin_l_bfgs_b'", - "limitation": null, - "ignored": false, - "docstring": "Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be maximized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b'" - }, - { - "name": "n_restarts_optimizer", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer=0 implies that one run is performed." - }, - { - "name": "max_iter_predict", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations in Newton's method for approximating the posterior during predict. Smaller values will reduce computation time at the cost of worse results." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If warm-starts are enabled, the solution of the last Newton iteration on the Laplace approximation of the posterior mode is used as initialization for the next call of _posterior_mode(). This can speed up convergence when _posterior_mode is called several times on similar problems as in hyperparameter optimization. See :term:`the Glossary `." - }, - { - "name": "copy_X_train", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vectors or other representations of training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values, must be binary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian process classification model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n Target values, must be binary\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute ``classes_``." - }, - { - "name": "log_marginal_likelihood", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None." - }, - { - "name": "clone_kernel", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns log-marginal likelihood of theta for training data.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True." - }, - { - "name": "_posterior_mode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mode-finding for binary Laplace GPC and fixed kernel.\n\nThis approximates the posterior of the latent function values for given\ninputs and target observations with a Gaussian approximation and uses\nNewton's iteration to find the mode of this approximation." - }, - { - "name": "_constrained_optimization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\nmax_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\nwarm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\ny_train_ : array-like of shape (n_samples,)\n Target values in training data (also required for prediction)\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nkernel_ : kernl instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in X_train_\n\npi_ : array-like of shape (n_samples,)\n The probabilities of the positive class for the training points\n X_train_\n\nW_sr_ : array-like of shape (n_samples,)\n Square root of W, the Hessian of log-likelihood of the latent function\n values for the observed labels. Since W is diagonal, only the diagonal\n of sqrt(W) is stored.\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``" - }, - { - "name": "GaussianProcessClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kernel specifying the covariance function of the GP. If None is passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that the kernel's hyperparameters are optimized during fitting." - }, - { - "name": "optimizer", - "type": "Union[Callable, Literal['fmin_l_bfgs_b']]", - "hasDefault": true, - "default": "'fmin_l_bfgs_b'", - "limitation": null, - "ignored": false, - "docstring": "Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be maximized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b'" - }, - { - "name": "n_restarts_optimizer", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer=0 implies that one run is performed." - }, - { - "name": "max_iter_predict", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations in Newton's method for approximating the posterior during predict. Smaller values will reduce computation time at the cost of worse results." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If warm-starts are enabled, the solution of the last Newton iteration on the Laplace approximation of the posterior mode is used as initialization for the next call of _posterior_mode(). This can speed up convergence when _posterior_mode is called several times on similar problems as in hyperparameter optimization. See :term:`the Glossary `." - }, - { - "name": "copy_X_train", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "multi_class", - "type": "Literal['one_vs_rest', 'one_vs_one']", - "hasDefault": true, - "default": "'one_vs_rest'", - "limitation": null, - "ignored": false, - "docstring": "Specifies how multi-class classification problems are handled. Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest', one binary Gaussian process classifier is fitted for each class, which is trained to separate this class from the rest. In 'one_vs_one', one binary Gaussian process classifier is fitted for each pair of classes, which is trained to separate these two classes. The predictions of these binary predictors are combined into multi-class predictions. Note that 'one_vs_one' does not support predicting probability estimates." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation: the specified multiclass problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vectors or other representations of training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values, must be binary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian process classification model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n Target values, must be binary\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated for classification." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`." - }, - { - "name": "kernel_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "log_marginal_likelihood", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel hyperparameters for which the log-marginal likelihood is evaluated. In the case of multi-class classification, theta may be the hyperparameters of the compound kernel or of an individual kernel. In the latter case, all individual kernel get assigned the same theta values. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. Note that gradient computation is not supported for non-binary classification. If True, theta must not be None." - }, - { - "name": "clone_kernel", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns log-marginal likelihood of theta for training data.\n\nIn the case of multi-class classification, the mean log-marginal\nlikelihood of the one-versus-rest classifiers are returned.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. In the case of multi-class classification, theta may\n be the hyperparameters of the compound kernel or of an individual\n kernel. In the latter case, all individual kernel get assigned the\n same theta values. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. Note that gradient computation is not supported\n for non-binary classification. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True." - } - ], - "docstring": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\nmax_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\nwarm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nmulti_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n Specifies how multi-class classification problems are handled.\n Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n one binary Gaussian process classifier is fitted for each class, which\n is trained to separate this class from the rest. In 'one_vs_one', one\n binary Gaussian process classifier is fitted for each pair of classes,\n which is trained to separate these two classes. The predictions of\n these binary predictors are combined into multi-class predictions.\n Note that 'one_vs_one' does not support predicting probability\n estimates.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the specified\n multiclass problems are computed in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nbase_estimator_ : ``Estimator`` instance\n The estimator instance that defines the likelihood function\n using the observed data.\n\nkernel_ : kernel instance\n The kernel used for prediction. In case of binary classification,\n the structure of the kernel is the same as the one passed as parameter\n but with optimized hyperparameters. In case of multi-class\n classification, a CompoundKernel is returned which consists of the\n different kernels used in the one-versus-rest classifiers.\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nn_classes_ : int\n The number of classes in the training data\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.83548752, 0.03228706, 0.13222543],\n [0.79064206, 0.06525643, 0.14410151]])\n\n.. versionadded:: 0.18" - } - ], - "functions": [] - }, - { - "name": "sklearn.gaussian_process._gpr", - "imports": [ - "import warnings", - "from operator import itemgetter", - "import numpy as np", - "from scipy.linalg import cholesky", - "from scipy.linalg import cho_solve", - "from scipy.linalg import solve_triangular", - "import scipy.optimize", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import clone", - "from base import MultiOutputMixin", - "from kernels import RBF", - "from kernels import ConstantKernel as C", - "from utils import check_random_state", - "from utils.validation import check_array", - "from utils.optimize import _check_optimize_result", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "GaussianProcessRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kernel specifying the covariance function of the GP. If None is passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\" * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that the kernel hyperparameters are optimized during fitting unless the bounds are marked as \"fixed\"." - }, - { - "name": "alpha", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1e-10", - "limitation": null, - "ignored": false, - "docstring": "Value added to the diagonal of the kernel matrix during fitting. This can prevent a potential numerical issue during fitting, by ensuring that the calculated values form a positive definite matrix. It can also be interpreted as the variance of additional Gaussian measurement noise on the training observations. Note that this is different from using a `WhiteKernel`. If an array is passed, it must have the same number of entries as the data used for fitting and is used as datapoint-dependent noise level. Allowing to specify the noise level directly as a parameter is mainly for convenience and for consistency with Ridge." - }, - { - "name": "optimizer", - "type": "Union[Callable, Literal[\"fmin_l_bfgs_b\"]]", - "hasDefault": true, - "default": "\"fmin_l_bfgs_b\"", - "limitation": null, - "ignored": false, - "docstring": "Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be minimized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b'" - }, - { - "name": "n_restarts_optimizer", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer == 0 implies that one run is performed." - }, - { - "name": "normalize_y", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether the target values y are normalized, the mean and variance of the target values are set equal to 0 and 1 respectively. This is recommended for cases where zero-mean, unit-variance priors are used. Note that, in this implementation, the normalisation is reversed before the GP predictions are reported. .. versionchanged:: 0.23" - }, - { - "name": "copy_X_train", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vectors or other representations of training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Gaussian process regression model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the standard-deviation of the predictive distribution at the query points is returned along with the mean." - }, - { - "name": "return_cov", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the covariance of the joint predictive distribution at the query points is returned along with the mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the Gaussian process regression model\n\nWe can also predict based on an unfitted model by using the GP prior.\nIn addition to the mean of the predictive distribution, also its\nstandard deviation (return_std=True) or covariance (return_cov=True).\nNote that at most one of the two can be requested.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\nreturn_std : bool, default=False\n If True, the standard-deviation of the predictive distribution at\n the query points is returned along with the mean.\n\nreturn_cov : bool, default=False\n If True, the covariance of the joint predictive distribution at\n the query points is returned along with the mean.\n\nReturns\n-------\ny_mean : ndarray of shape (n_samples, [n_output_dims])\n Mean of predictive distribution a query points.\n\ny_std : ndarray of shape (n_samples,), optional\n Standard deviation of predictive distribution at query points.\n Only returned when `return_std` is True.\n\ny_cov : ndarray of shape (n_samples, n_samples), optional\n Covariance of joint predictive distribution a query points.\n Only returned when `return_cov` is True." - }, - { - "name": "sample_y", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Query points where the GP is evaluated." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of samples drawn from the Gaussian process" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation to randomly draw samples. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Draw samples from Gaussian process and evaluate at X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\nn_samples : int, default=1\n The number of samples drawn from the Gaussian process\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation to randomly draw samples.\n Pass an int for reproducible results across multiple function\n calls.\n See :term: `Glossary `.\n\nReturns\n-------\ny_samples : ndarray of shape (n_samples_X, [n_output_dims], n_samples)\n Values of n_samples samples drawn from Gaussian process and\n evaluated at query points." - }, - { - "name": "log_marginal_likelihood", - "decorators": [], - "parameters": [ - { - "name": "theta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned." - }, - { - "name": "eval_gradient", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None." - }, - { - "name": "clone_kernel", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns log-marginal likelihood of theta for training data.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,) default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when eval_gradient is True." - }, - { - "name": "_constrained_optimization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of Gaussian Processes\nfor Machine Learning (GPML) by Rasmussen and Williams.\n\nIn addition to standard scikit-learn estimator API,\nGaussianProcessRegressor:\n\n * allows prediction without prior fitting (based on the GP prior)\n * provides an additional method sample_y(X), which evaluates samples\n drawn from the GPR (prior or posterior) at given inputs\n * exposes a method log_marginal_likelihood(theta), which can be used\n externally for other ways of selecting hyperparameters, e.g., via\n Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\"\n * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n the kernel hyperparameters are optimized during fitting unless the\n bounds are marked as \"fixed\".\n\nalpha : float or ndarray of shape (n_samples,), default=1e-10\n Value added to the diagonal of the kernel matrix during fitting.\n This can prevent a potential numerical issue during fitting, by\n ensuring that the calculated values form a positive definite matrix.\n It can also be interpreted as the variance of additional Gaussian\n measurement noise on the training observations. Note that this is\n different from using a `WhiteKernel`. If an array is passed, it must\n have the same number of entries as the data used for fitting and is\n used as datapoint-dependent noise level. Allowing to specify the\n noise level directly as a parameter is mainly for convenience and\n for consistency with Ridge.\n\noptimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be minimized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer == 0 implies that one\n run is performed.\n\nnormalize_y : bool, default=False\n Whether the target values y are normalized, the mean and variance of\n the target values are set equal to 0 and 1 respectively. This is\n recommended for cases where zero-mean, unit-variance priors are used.\n Note that, in this implementation, the normalisation is reversed\n before the GP predictions are reported.\n\n .. versionchanged:: 0.23\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\ny_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values in training data (also required for prediction)\n\nkernel_ : kernel instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in ``X_train_``\n\nalpha_ : array-like of shape (n_samples,)\n Dual coefficients of training data points in kernel space\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))" - } - ], - "functions": [] - }, - { - "name": "sklearn.gaussian_process", - "imports": [ - "from _gpr import GaussianProcessRegressor", - "from _gpc import GaussianProcessClassifier", - "from None import kernels" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.gaussian_process.tests.test_gpc", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy.optimize import approx_fprime", - "import pytest", - "from sklearn.gaussian_process import GaussianProcessClassifier", - "from sklearn.gaussian_process.kernels import RBF", - "from sklearn.gaussian_process.kernels import ConstantKernel as C", - "from sklearn.gaussian_process.kernels import WhiteKernel", - "from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message" - ], - "classes": [], - "functions": [ - { - "name": "f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_consistent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_consistent_structured", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_improving", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_without_cloning_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_converged_to_local_maximum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_starts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_class_n_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process.tests.test_gpr", - "imports": [ - "import sys", - "import numpy as np", - "import warnings", - "from scipy.optimize import approx_fprime", - "import pytest", - "from sklearn.gaussian_process import GaussianProcessRegressor", - "from sklearn.gaussian_process.kernels import RBF", - "from sklearn.gaussian_process.kernels import ConstantKernel as C", - "from sklearn.gaussian_process.kernels import WhiteKernel", - "from sklearn.gaussian_process.kernels import DotProduct", - "from sklearn.gaussian_process.kernels import ExpSineSquared", - "from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_warns_message" - ], - "classes": [], - "functions": [ - { - "name": "f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gpr_interpolation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gpr_interpolation_structured", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_improving", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_without_cloning_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_converged_to_local_maximum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_solution_inside_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lml_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_statistics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_cov_vs_std", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_anisotropic_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_starts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_y_normalization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test normalization of the target values in GP\n\nFitting non-normalizing GP on normalized y and fitting normalizing GP\non unnormalized y should yield identical results. Note that, here,\n'normalized y' refers to y that has been made zero mean and unit\nvariance." - }, - { - "name": "test_large_variance_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Here we test that, when noramlize_y=True, our GP can produce a\nsensible fit to training data whose variance is significantly\nlarger than unity. This test was made in response to issue #15612.\n\nGP predictions are verified against predictions that were made\nusing GPy which, here, is treated as the 'gold standard'. Note that we\nonly investigate the RBF kernel here, as that is what was used in the\nGPy implementation.\n\nThe following code can be used to recreate the GPy data:\n\n--------------------------------------------------------------------------\nimport GPy\n\nkernel_gpy = GPy.kern.RBF(input_dim=1, lengthscale=1.)\ngpy = GPy.models.GPRegression(X, np.vstack(y_large), kernel_gpy)\ngpy.optimize()\ny_pred_gpy, y_var_gpy = gpy.predict(X2)\ny_pred_std_gpy = np.sqrt(y_var_gpy)\n--------------------------------------------------------------------------" - }, - { - "name": "test_y_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gpr_correct_error_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_duplicate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_fit_default_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_K_inv_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bound_check_fixed_hyperparameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process.tests.test_kernels", - "imports": [ - "import pytest", - "import numpy as np", - "from inspect import signature", - "from sklearn.gaussian_process.kernels import _approx_fprime", - "from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS", - "from sklearn.metrics.pairwise import euclidean_distances", - "from sklearn.metrics.pairwise import pairwise_kernels", - "from sklearn.gaussian_process.kernels import RBF", - "from sklearn.gaussian_process.kernels import Matern", - "from sklearn.gaussian_process.kernels import RationalQuadratic", - "from sklearn.gaussian_process.kernels import ExpSineSquared", - "from sklearn.gaussian_process.kernels import DotProduct", - "from sklearn.gaussian_process.kernels import ConstantKernel", - "from sklearn.gaussian_process.kernels import WhiteKernel", - "from sklearn.gaussian_process.kernels import PairwiseKernel", - "from sklearn.gaussian_process.kernels import KernelOperator", - "from sklearn.gaussian_process.kernels import Exponentiation", - "from sklearn.gaussian_process.kernels import CompoundKernel", - "from sklearn.base import clone", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raise_message" - ], - "classes": [], - "functions": [ - { - "name": "test_kernel_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_theta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_vs_cross", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_operator_commutative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_anisotropic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_stationary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_input_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compound_kernel_input_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_hyperparameters_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_clone_after_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matern_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_versus_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr_kernels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rational_quadratic_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.gaussian_process.tests._mini_sequence_kernel", - "imports": [ - "from sklearn.gaussian_process.kernels import Kernel", - "from sklearn.gaussian_process.kernels import Hyperparameter", - "from sklearn.gaussian_process.kernels import GenericKernelMixin", - "from sklearn.gaussian_process.kernels import StationaryKernelMixin", - "import numpy as np", - "from sklearn.base import clone" - ], - "classes": [ - { - "name": "MiniSeqKernel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "hyperparameter_baseline_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_f", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_g", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "clone_with_theta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A minimal (but valid) convolutional kernel for sequences of variable\nlength." - } - ], - "functions": [] - }, - { - "name": "sklearn.gaussian_process.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.impute._base", - "imports": [ - "import numbers", - "import warnings", - "from collections import Counter", - "import numpy as np", - "import numpy.ma as ma", - "from scipy import sparse as sp", - "from scipy import stats", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.sparsefuncs import _get_median", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from utils._mask import _get_mask", - "from utils import is_scalar_nan" - ], - "classes": [ - { - "name": "_BaseImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a MissingIndicator." - }, - { - "name": "_transform_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the indicator mask.'\n\nNote that X must be the original data as passed to the imputer before\nany imputation, since imputation may be done inplace in some cases." - }, - { - "name": "_concatenate_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Concatenate indicator mask with the imputed data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`." - }, - { - "name": "SimpleImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "missing_values", - "type": "Optional[Union[float, int, str]]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." - }, - { - "name": "strategy", - "type": "str", - "hasDefault": true, - "default": "'mean'", - "limitation": null, - "ignored": false, - "docstring": "The imputation strategy. - If \"mean\", then replace missing values using the mean along each column. Can only be used with numeric data. - If \"median\", then replace missing values using the median along each column. Can only be used with numeric data. - If \"most_frequent\", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. If there is more than one such value, only the smallest is returned. - If \"constant\", then replace missing values with fill_value. Can be used with strings or numeric data. .. versionadded:: 0.20 strategy=\"constant\" for fixed value imputation." - }, - { - "name": "fill_value", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When strategy == \"constant\", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and \"missing_value\" for strings or object data types." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity of the imputer." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a copy of X will be created. If False, imputation will be done in-place whenever possible. Note that, in the following cases, a new copy will always be made, even if `copy=False`: - If X is not an array of floating values; - If X is encoded as a CSR matrix; - If add_indicator=True." - }, - { - "name": "add_indicator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, a :class:`MissingIndicator` transform will stack onto output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nReturns\n-------\nself : SimpleImputer" - }, - { - "name": "_sparse_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the transformer on sparse data." - }, - { - "name": "_dense_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the transformer on dense data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The imputed data to be reverted to original data. It has to be an augmented array of imputed data and the missing indicator mask." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the data back to the original representation.\n\nInverts the `transform` operation performed on an array.\nThis operation can only be performed after :class:`SimpleImputer` is\ninstantiated with `add_indicator=True`.\n\nNote that ``inverse_transform`` can only invert the transform in\nfeatures that have binary indicators for missing values. If a feature\nhas no missing values at ``fit`` time, the feature won't have a binary\nindicator, and the imputation done at ``transform`` time won't be\ninverted.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features + n_features_missing_indicator)\n The imputed data to be reverted to original data. It has to be\n an augmented array of imputed data and the missing indicator mask.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n The original X with missing values as it was prior\n to imputation." - } - ], - "docstring": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n estimator which is now removed.\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nstrategy : string, default='mean'\n The imputation strategy.\n\n - If \"mean\", then replace missing values using the mean along\n each column. Can only be used with numeric data.\n - If \"median\", then replace missing values using the median along\n each column. Can only be used with numeric data.\n - If \"most_frequent\", then replace missing using the most frequent\n value along each column. Can be used with strings or numeric data.\n If there is more than one such value, only the smallest is returned.\n - If \"constant\", then replace missing values with fill_value. Can be\n used with strings or numeric data.\n\n .. versionadded:: 0.20\n strategy=\"constant\" for fixed value imputation.\n\nfill_value : string or numerical value, default=None\n When strategy == \"constant\", fill_value is used to replace all\n occurrences of missing_values.\n If left to the default, fill_value will be 0 when imputing numerical\n data and \"missing_value\" for strings or object data types.\n\nverbose : integer, default=0\n Controls the verbosity of the imputer.\n\ncopy : boolean, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible. Note that, in the following cases,\n a new copy will always be made, even if `copy=False`:\n\n - If X is not an array of floating values;\n - If X is encoded as a CSR matrix;\n - If add_indicator=True.\n\nadd_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\nAttributes\n----------\nstatistics_ : array of shape (n_features,)\n The imputation fill value for each feature.\n Computing statistics can result in `np.nan` values.\n During :meth:`transform`, features corresponding to `np.nan`\n statistics will be discarded.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nSee Also\n--------\nIterativeImputer : Multivariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import SimpleImputer\n>>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nSimpleImputer()\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> print(imp_mean.transform(X))\n[[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\n\nNotes\n-----\nColumns which only contained missing values at :meth:`fit` are discarded\nupon :meth:`transform` if strategy is not \"constant\"." - }, - { - "name": "MissingIndicator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "missing_values", - "type": "Optional[Union[float, int, str]]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." - }, - { - "name": "features", - "type": "Literal['missing-only', 'all']", - "hasDefault": true, - "default": "'missing-only'", - "limitation": null, - "ignored": false, - "docstring": "Whether the imputer mask should represent all or a subset of features. - If 'missing-only' (default), the imputer mask will only represent features containing missing values during fit time. - If 'all', the imputer mask will represent all features." - }, - { - "name": "sparse", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the imputer mask format should be sparse or dense. - If 'auto' (default), the imputer mask will be of same type as input. - If True, the imputer mask will be a sparse matrix. - If False, the imputer mask will be a numpy array." - }, - { - "name": "error_on_new", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, transform will raise an error when there are features with missing values in transform that have no missing values in fit. This is applicable only when `features='missing-only'`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_missing_features_info", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data with missing values. Note that ``X`` has been checked in ``fit`` and ``transform`` before to call this function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the imputer mask and the indices of the features\ncontaining missing values.\n\nParameters\n----------\nX : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The input data with missing values. Note that ``X`` has been\n checked in ``fit`` and ``transform`` before to call this function.\n\nReturns\n-------\nimputer_mask : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The imputer mask of the original data.\n\nfeatures_with_missing : ndarray, shape (n_features_with_missing)\n The features containing missing values." - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. If `precomputed` is True, then `X` is a mask of the input data." - }, - { - "name": "precomputed", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether the input data is a mask." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the transformer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n If `precomputed` is True, then `X` is a mask of the\n input data.\n\nprecomputed : bool\n Whether the input data is a mask.\n\nReturns\n-------\nimputer_mask : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The imputer mask of the original data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the transformer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nReturns\n-------\nself : object\n Returns self." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate missing values indicator for X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : {ndarray or sparse matrix}, shape (n_samples, n_features) or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate missing values indicator for X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : {ndarray or sparse matrix}, shape (n_samples, n_features) or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmissing_values : int, float, string, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nfeatures : {'missing-only', 'all'}, default='missing-only'\n Whether the imputer mask should represent all or a subset of\n features.\n\n - If 'missing-only' (default), the imputer mask will only represent\n features containing missing values during fit time.\n - If 'all', the imputer mask will represent all features.\n\nsparse : bool or 'auto', default='auto'\n Whether the imputer mask format should be sparse or dense.\n\n - If 'auto' (default), the imputer mask will be of same type as\n input.\n - If True, the imputer mask will be a sparse matrix.\n - If False, the imputer mask will be a numpy array.\n\nerror_on_new : bool, default=True\n If True, transform will raise an error when there are features with\n missing values in transform that have no missing values in fit. This is\n applicable only when `features='missing-only'`.\n\nAttributes\n----------\nfeatures_ : ndarray, shape (n_missing_features,) or (n_features,)\n The features indices which will be returned when calling ``transform``.\n They are computed during ``fit``. For ``features='all'``, it is\n to ``range(n_features)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import MissingIndicator\n>>> X1 = np.array([[np.nan, 1, 3],\n... [4, 0, np.nan],\n... [8, 1, 0]])\n>>> X2 = np.array([[5, 1, np.nan],\n... [np.nan, 2, 3],\n... [2, 4, 0]])\n>>> indicator = MissingIndicator()\n>>> indicator.fit(X1)\nMissingIndicator()\n>>> X2_tr = indicator.transform(X2)\n>>> X2_tr\narray([[False, True],\n [ True, False],\n [False, False]])" - } - ], - "functions": [ - { - "name": "_check_inputs_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_most_frequent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the most frequent value in a 1d array extended with\n[extra_value] * n_repeat, where extra_value is assumed to be not part\nof the array." - } - ] - }, - { - "name": "sklearn.impute._iterative", - "imports": [ - "from time import time", - "from collections import namedtuple", - "import warnings", - "from scipy import stats", - "import numpy as np", - "from base import clone", - "from exceptions import ConvergenceWarning", - "from preprocessing import normalize", - "from utils import check_array", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils import is_scalar_nan", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import check_is_fitted", - "from utils._mask import _get_mask", - "from _base import _BaseImputer", - "from _base import SimpleImputer", - "from _base import _check_inputs_dtype", - "from linear_model import BayesianRidge" - ], - "classes": [ - { - "name": "IterativeImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to use at each step of the round-robin imputation. If ``sample_posterior`` is True, the estimator must support ``return_std`` in its ``predict`` method." - }, - { - "name": "missing_values", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." - }, - { - "name": "sample_posterior", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to sample from the (Gaussian) predictive posterior of the fitted estimator for each imputation. Estimator must support ``return_std`` in its ``predict`` method if set to ``True``. Set to ``True`` if using ``IterativeImputer`` for multiple imputations." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of imputation rounds to perform before returning the imputations computed during the final round. A round is a single imputation of each feature with missing values. The stopping criterion is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals]))` < tol, where `X_t` is `X` at iteration `t. Note that early stopping is only applied if ``sample_posterior=False``." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance of the stopping condition." - }, - { - "name": "n_nearest_features", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of other features to use to estimate the missing values of each feature column. Nearness between features is measured using the absolute correlation coefficient between each feature pair (after initial imputation). To ensure coverage of features throughout the imputation process, the neighbor features are not necessarily nearest, but are drawn with probability proportional to correlation for each imputed target feature. Can provide significant speed-up when the number of features is huge. If ``None``, all features will be used." - }, - { - "name": "initial_strategy", - "type": "str", - "hasDefault": true, - "default": "'mean'", - "limitation": null, - "ignored": false, - "docstring": "Which strategy to use to initialize the missing values. Same as the ``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer` Valid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}." - }, - { - "name": "imputation_order", - "type": "str", - "hasDefault": true, - "default": "'ascending'", - "limitation": null, - "ignored": false, - "docstring": "The order in which the features will be imputed. Possible values: \"ascending\" From features with fewest missing values to most. \"descending\" From features with most missing values to fewest. \"roman\" Left to right. \"arabic\" Right to left. \"random\" A random order for each round." - }, - { - "name": "skip_complete", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``True`` then features with missing values during ``transform`` which did not have any missing values during ``fit`` will be imputed with the initial imputation method only. Set to ``True`` if you have many features with no missing values at both ``fit`` and ``transform`` time to save compute." - }, - { - "name": "min_value", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "-np", - "limitation": null, - "ignored": false, - "docstring": "Minimum possible imputed value. Broadcast to shape (n_features,) if scalar. If array-like, expects shape (n_features,), one min value for each feature. The default is `-np.inf`. .. versionchanged:: 0.23 Added support for array-like." - }, - { - "name": "max_value", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Maximum possible imputed value. Broadcast to shape (n_features,) if scalar. If array-like, expects shape (n_features,), one max value for each feature. The default is `np.inf`. .. versionchanged:: 0.23 Added support for array-like." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity flag, controls the debug messages that are issued as functions are evaluated. The higher, the more verbose. Can be 0, 1, or 2." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator to use. Randomizes selection of estimator features if n_nearest_features is not None, the ``imputation_order`` if ``random``, and the sampling from posterior if ``sample_posterior`` is True. Use an integer for determinism. See :term:`the Glossary `." - }, - { - "name": "add_indicator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, a :class:`MissingIndicator` transform will stack onto output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_impute_one_feature", - "decorators": [], - "parameters": [ - { - "name": "X_filled", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data with the most recent imputations." - }, - { - "name": "mask_missing_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data's missing indicator matrix." - }, - { - "name": "feat_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the feature currently being imputed." - }, - { - "name": "neighbor_feat_idx", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the features to be used in imputing ``feat_idx``." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to use at this step of the round-robin imputation. If ``sample_posterior`` is True, the estimator must support ``return_std`` in its ``predict`` method. If None, it will be cloned from self._estimator." - }, - { - "name": "fit_mode", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit and predict with the estimator or just predict." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Impute a single feature from the others provided.\n\nThis function predicts the missing values of one of the features using\nthe current estimates of all the other features. The ``estimator`` must\nsupport ``return_std=True`` in its ``predict`` method for this function\nto work.\n\nParameters\n----------\nX_filled : ndarray\n Input data with the most recent imputations.\n\nmask_missing_values : ndarray\n Input data's missing indicator matrix.\n\nfeat_idx : int\n Index of the feature currently being imputed.\n\nneighbor_feat_idx : ndarray\n Indices of the features to be used in imputing ``feat_idx``.\n\nestimator : object\n The estimator to use at this step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n If None, it will be cloned from self._estimator.\n\nfit_mode : boolean, default=True\n Whether to fit and predict with the estimator or just predict.\n\nReturns\n-------\nX_filled : ndarray\n Input data with ``X_filled[missing_row_mask, feat_idx]`` updated.\n\nestimator : estimator with sklearn API\n The fitted estimator used to impute\n ``X_filled[missing_row_mask, feat_idx]``." - }, - { - "name": "_get_neighbor_feat_idx", - "decorators": [], - "parameters": [ - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features in ``X``." - }, - { - "name": "feat_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the feature currently being imputed." - }, - { - "name": "abs_corr_mat", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Absolute correlation matrix of ``X``. The diagonal has been zeroed out and each feature has been normalized to sum to 1. Can be None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a list of other features to predict ``feat_idx``.\n\nIf self.n_nearest_features is less than or equal to the total\nnumber of features, then use a probability proportional to the absolute\ncorrelation between ``feat_idx`` and each other feature to randomly\nchoose a subsample of the other features (without replacement).\n\nParameters\n----------\nn_features : int\n Number of features in ``X``.\n\nfeat_idx : int\n Index of the feature currently being imputed.\n\nabs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X``. The diagonal has been zeroed\n out and each feature has been normalized to sum to 1. Can be None.\n\nReturns\n-------\nneighbor_feat_idx : array-like\n The features to use to impute ``feat_idx``." - }, - { - "name": "_get_ordered_idx", - "decorators": [], - "parameters": [ - { - "name": "mask_missing_values", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data's missing indicator matrix, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decide in what order we will update the features.\n\nAs a homage to the MICE R package, we will have 4 main options of\nhow to order the updates, and use a random order if anything else\nis specified.\n\nAlso, this function skips features which have no missing values.\n\nParameters\n----------\nmask_missing_values : array-like, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\nReturns\n-------\nordered_idx : ndarray, shape (n_features,)\n The order in which to impute the features." - }, - { - "name": "_get_abs_corr_mat", - "decorators": [], - "parameters": [ - { - "name": "X_filled", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data with the most recent imputations." - }, - { - "name": "tolerance", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "``abs_corr_mat`` can have nans, which will be replaced with ``tolerance``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get absolute correlation matrix between features.\n\nParameters\n----------\nX_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\ntolerance : float, default=1e-6\n ``abs_corr_mat`` can have nans, which will be replaced\n with ``tolerance``.\n\nReturns\n-------\nabs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X`` at the beginning of the\n current round. The diagonal has been zeroed out and each feature's\n absolute correlations with all others have been normalized to sum\n to 1." - }, - { - "name": "_initial_imputation", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - }, - { - "name": "in_fit", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether function is called in fit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform initial imputation for input X.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\nin_fit : bool, default=False\n Whether function is called in fit.\n\nReturns\n-------\nXt : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\nX_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\nmask_missing_values : ndarray, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\nX_missing_mask : ndarray, shape (n_samples, n_features)\n Input data's mask matrix indicating missing datapoints, where\n \"n_samples\" is the number of samples and \"n_features\" is the\n number of features." - }, - { - "name": "_validate_limit", - "decorators": [], - "parameters": [ - { - "name": "limit: scalar or array-like", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-specified limit (i.e, min_value or max_value)" - }, - { - "name": "limit_type: string", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n_features: Number of features in the dataset" - }, - { - "name": "\"max\" or \"min\"", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n_features: Number of features in the dataset" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Validate the limits (min/max) of the feature values\nConverts scalar min/max limits to vectors of shape (n_features,)\n\nParameters\n----------\nlimit: scalar or array-like\n The user-specified limit (i.e, min_value or max_value)\nlimit_type: string, \"max\" or \"min\"\n n_features: Number of features in the dataset\n\nReturns\n-------\nlimit: ndarray, shape(n_features,)\n Array of limits, one for each feature" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits the imputer on X and return the transformed X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\ny : ignored.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n The imputed input data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Imputes all missing values in X.\n\nNote that this is stochastic, and that if random_state is not fixed,\nrepeated calls, or permuted input, will yield different results.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n The imputed input data." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where \"n_samples\" is the number of samples and \"n_features\" is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits the imputer on X and return self.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\ny : ignored\n\nReturns\n-------\nself : object\n Returns self." - } - ], - "docstring": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_iterative_imputer``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_iterative_imputer # noqa\n >>> # now you can import normally from sklearn.impute\n >>> from sklearn.impute import IterativeImputer\n\nParameters\n----------\nestimator : estimator object, default=BayesianRidge()\n The estimator to use at each step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n\nmissing_values : int, np.nan, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nsample_posterior : boolean, default=False\n Whether to sample from the (Gaussian) predictive posterior of the\n fitted estimator for each imputation. Estimator must support\n ``return_std`` in its ``predict`` method if set to ``True``. Set to\n ``True`` if using ``IterativeImputer`` for multiple imputations.\n\nmax_iter : int, default=10\n Maximum number of imputation rounds to perform before returning the\n imputations computed during the final round. A round is a single\n imputation of each feature with missing values. The stopping criterion\n is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals]))` < tol,\n where `X_t` is `X` at iteration `t. Note that early stopping is only\n applied if ``sample_posterior=False``.\n\ntol : float, default=1e-3\n Tolerance of the stopping condition.\n\nn_nearest_features : int, default=None\n Number of other features to use to estimate the missing values of\n each feature column. Nearness between features is measured using\n the absolute correlation coefficient between each feature pair (after\n initial imputation). To ensure coverage of features throughout the\n imputation process, the neighbor features are not necessarily nearest,\n but are drawn with probability proportional to correlation for each\n imputed target feature. Can provide significant speed-up when the\n number of features is huge. If ``None``, all features will be used.\n\ninitial_strategy : str, default='mean'\n Which strategy to use to initialize the missing values. Same as the\n ``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer`\n Valid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}.\n\nimputation_order : str, default='ascending'\n The order in which the features will be imputed. Possible values:\n\n \"ascending\"\n From features with fewest missing values to most.\n \"descending\"\n From features with most missing values to fewest.\n \"roman\"\n Left to right.\n \"arabic\"\n Right to left.\n \"random\"\n A random order for each round.\n\nskip_complete : boolean, default=False\n If ``True`` then features with missing values during ``transform``\n which did not have any missing values during ``fit`` will be imputed\n with the initial imputation method only. Set to ``True`` if you have\n many features with no missing values at both ``fit`` and ``transform``\n time to save compute.\n\nmin_value : float or array-like of shape (n_features,), default=-np.inf\n Minimum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one min value for\n each feature. The default is `-np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\nmax_value : float or array-like of shape (n_features,), default=np.inf\n Maximum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one max value for\n each feature. The default is `np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\nverbose : int, default=0\n Verbosity flag, controls the debug messages that are issued\n as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n or 2.\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use. Randomizes\n selection of estimator features if n_nearest_features is not None, the\n ``imputation_order`` if ``random``, and the sampling from posterior if\n ``sample_posterior`` is True. Use an integer for determinism.\n See :term:`the Glossary `.\n\nadd_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\nAttributes\n----------\ninitial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n Imputer used to initialize the missing values.\n\nimputation_sequence_ : list of tuples\n Each tuple has ``(feat_idx, neighbor_feat_idx, estimator)``, where\n ``feat_idx`` is the current feature to be imputed,\n ``neighbor_feat_idx`` is the array of other features used to impute the\n current feature, and ``estimator`` is the trained estimator used for\n the imputation. Length is ``self.n_features_with_missing_ *\n self.n_iter_``.\n\nn_iter_ : int\n Number of iteration rounds that occurred. Will be less than\n ``self.max_iter`` if early stopping criterion was reached.\n\nn_features_with_missing_ : int\n Number of features with missing values.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\nSee Also\n--------\nSimpleImputer : Univariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.experimental import enable_iterative_imputer\n>>> from sklearn.impute import IterativeImputer\n>>> imp_mean = IterativeImputer(random_state=0)\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nIterativeImputer(random_state=0)\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> imp_mean.transform(X)\narray([[ 6.9584..., 2. , 3. ],\n [ 4. , 2.6000..., 6. ],\n [10. , 4.9999..., 9. ]])\n\nNotes\n-----\nTo support imputation in inductive mode we store each feature's estimator\nduring the ``fit`` phase, and predict without refitting (in order) during\nthe ``transform`` phase.\n\nFeatures which contain all missing values at ``fit`` are discarded upon\n``transform``.\n\nReferences\n----------\n.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n Multivariate Imputation by Chained Equations in R\". Journal of\n Statistical Software 45: 1-67.\n `_\n\n.. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n Multivariate Data Suitable for use with an Electronic Computer\".\n Journal of the Royal Statistical Society 22(2): 302-306.\n `_" - } - ], - "functions": [] - }, - { - "name": "sklearn.impute._knn", - "imports": [ - "import numpy as np", - "from _base import _BaseImputer", - "from utils.validation import FLOAT_DTYPES", - "from metrics import pairwise_distances_chunked", - "from metrics.pairwise import _NAN_METRICS", - "from neighbors._base import _get_weights", - "from neighbors._base import _check_weights", - "from utils import is_scalar_nan", - "from utils._mask import _get_mask", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KNNImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "missing_values", - "type": "Optional[Union[float, int, str]]", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` should be set to np.nan, since `pd.NA` will be converted to np.nan." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighboring samples to use for imputation." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - callable : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights." - }, - { - "name": "metric", - "type": "Literal['nan_euclidean']", - "hasDefault": true, - "default": "'nan_euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Distance metric for searching neighbors. Possible values: - 'nan_euclidean' - callable : a user-defined function which conforms to the definition of ``_pairwise_callable(X, Y, metric, **kwds)``. The function accepts two arrays, X and Y, and a `missing_values` keyword in `kwds` and returns a scalar distance value." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a copy of X will be created. If False, imputation will be done in-place whenever possible." - }, - { - "name": "add_indicator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, a :class:`MissingIndicator` transform will stack onto the output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_calc_impute", - "decorators": [], - "parameters": [ - { - "name": "dist_pot_donors", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distance matrix between the receivers and potential donors from training set. There must be at least one non-nan distance between a receiver and a potential donor." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to consider." - }, - { - "name": "fit_X_col", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Column of potential donors from training set." - }, - { - "name": "mask_fit_X_col", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Missing mask for fit_X_col." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Helper function to impute a single column.\n\nParameters\n----------\ndist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n Distance matrix between the receivers and potential donors from\n training set. There must be at least one non-nan distance between\n a receiver and a potential donor.\n\nn_neighbors : int\n Number of neighbors to consider.\n\nfit_X_col : ndarray of shape (n_potential_donors,)\n Column of potential donors from training set.\n\nmask_fit_X_col : ndarray of shape (n_potential_donors,)\n Missing mask for fit_X_col.\n\nReturns\n-------\nimputed_values: ndarray of shape (n_receivers,)\n Imputed values for receiver." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : array-like shape of (n_samples, n_features)\n Input data, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nself : object" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data to complete." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_output_features)\n The imputed dataset. `n_output_features` is the number of features\n that is not always missing during `fit`." - } - ], - "docstring": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\nn_neighbors : int, default=5\n Number of neighboring samples to use for imputation.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n Weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood are\n weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - callable : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nmetric : {'nan_euclidean'} or callable, default='nan_euclidean'\n Distance metric for searching neighbors. Possible values:\n\n - 'nan_euclidean'\n - callable : a user-defined function which conforms to the definition\n of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n accepts two arrays, X and Y, and a `missing_values` keyword in\n `kwds` and returns a scalar distance value.\n\ncopy : bool, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible.\n\nadd_indicator : bool, default=False\n If True, a :class:`MissingIndicator` transform will stack onto the\n output of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on the\n missing indicator even if there are missing values at transform/test\n time.\n\nAttributes\n----------\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nReferences\n----------\n* Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n no. 6, 2001 Pages 520-525.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import KNNImputer\n>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n>>> imputer = KNNImputer(n_neighbors=2)\n>>> imputer.fit_transform(X)\narray([[1. , 2. , 4. ],\n [3. , 4. , 3. ],\n [5.5, 6. , 5. ],\n [8. , 8. , 7. ]])" - } - ], - "functions": [] - }, - { - "name": "sklearn.impute", - "imports": [ - "import typing", - "from _base import MissingIndicator", - "from _base import SimpleImputer", - "from _knn import KNNImputer", - "from _iterative import IterativeImputer" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.impute.tests.test_base", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.impute._base import _BaseImputer", - "from sklearn.utils._mask import _get_mask" - ], - "classes": [ - { - "name": "NoFitIndicatorImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoTransformIndicatorImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoPrecomputedMaskFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoPrecomputedMaskTransform", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_imputer_not_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_imputer_not_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_no_precomputed_mask_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_no_precomputed_mask_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests.test_common", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.experimental import enable_iterative_imputer", - "from sklearn.impute import IterativeImputer", - "from sklearn.impute import KNNImputer", - "from sklearn.impute import SimpleImputer" - ], - "classes": [], - "functions": [ - { - "name": "test_imputation_missing_value_in_test_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputers_add_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputers_add_indicator_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputers_pandas_na_integer_array_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests.test_impute", - "imports": [ - "from __future__ import division", - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats import kstest", - "import io", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.experimental import enable_iterative_imputer", - "from sklearn.datasets import load_diabetes", - "from sklearn.impute import MissingIndicator", - "from sklearn.impute import SimpleImputer", - "from sklearn.impute import IterativeImputer", - "from sklearn.dummy import DummyRegressor", - "from sklearn.linear_model import BayesianRidge", - "from sklearn.linear_model import ARDRegression", - "from sklearn.linear_model import RidgeCV", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import make_union", - "from sklearn.model_selection import GridSearchCV", - "from sklearn import tree", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.impute._base import _most_frequent" - ], - "classes": [], - "functions": [ - { - "name": "_check_statistics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Utility function for testing imputation for a given strategy.\n\nTest with dense and sparse arrays\n\nCheck that:\n - the statistics (mean, median, mode) are correct\n - the missing values are imputed correctly" - }, - { - "name": "test_imputation_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_error_invalid_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_deletion_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_error_sparse_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "safe_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "safe_mean", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_mean_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_median_special_cases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_mean_median_error_invalid_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_mean_median_error_invalid_type_list_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_const_mostf_error_invalid_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_most_frequent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_most_frequent_objects", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_most_frequent_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_error_invalid_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_integer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_constant_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_one_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_pipeline_grid_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_zero_iters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_all_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_imputation_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_clip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_clip_truncnorm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_truncated_normal_posterior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_missing_at_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_transform_stochasticity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_no_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_rank_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_transform_recovery", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_additive_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_error_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_catch_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_min_max_array_like", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_catch_min_max_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_min_max_array_like_imputation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_skip_non_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iterative_imputer_dont_set_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_new", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_raise_on_sparse_with_missing_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_sparse_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_with_imputer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inconsistent_dtype_X_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_no_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_indicator_sparse_no_explicit_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputer_without_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_add_indicator_sparse_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_string_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_imputation_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_imputation_inverse_transform_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests.test_knn", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn import config_context", - "from sklearn.impute import KNNImputer", - "from sklearn.metrics.pairwise import nan_euclidean_distances", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_knn_imputer_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_default_with_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_removes_all_na_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_zero_nan_imputes_the_same", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_verify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_one_n_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_all_samples_are_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_weight_uniform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_weight_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_callable_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_with_simple_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_not_enough_valid_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_drops_all_nan_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_imputer_distance_weighted_not_enough_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.impute.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection.setup", - "imports": [ - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.inspection._partial_dependence", - "imports": [ - "from collections.abc import Iterable", - "import warnings", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats.mstats import mquantiles", - "from base import is_classifier", - "from base import is_regressor", - "from pipeline import Pipeline", - "from utils.extmath import cartesian", - "from utils import check_array", - "from utils import check_matplotlib_support", - "from utils import _safe_indexing", - "from utils import _determine_key_type", - "from utils import _get_column_indices", - "from utils.validation import check_is_fitted", - "from utils import Bunch", - "from utils.validation import _deprecate_positional_args", - "from tree import DecisionTreeRegressor", - "from ensemble import RandomForestRegressor", - "from exceptions import NotFittedError", - "from ensemble._gb import BaseGradientBoosting", - "from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import BaseHistGradientBoosting" - ], - "classes": [], - "functions": [ - { - "name": "_grid_from_X", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "percentiles", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The percentiles which are used to construct the extreme values of the grid. Must be in [0, 1]." - }, - { - "name": "grid_resolution", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of equally spaced points to be placed on the grid for each feature." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\nIf ``grid_resolution`` is bigger than the number of unique values in the\njth column of X, then those unique values will be used instead.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_target_features)\n The data.\n\npercentiles : tuple of floats\n The percentiles which are used to construct the extreme values of\n the grid. Must be in [0, 1].\n\ngrid_resolution : int\n The number of equally spaced points to be placed on the grid for each\n feature.\n\nReturns\n-------\ngrid : ndarray, shape (n_points, n_target_features)\n A value for each feature at each point in the grid. ``n_points`` is\n always ``<= grid_resolution ** X.shape[1]``.\n\nvalues : list of 1d ndarrays\n The values with which the grid has been created. The size of each\n array ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller." - }, - { - "name": "_partial_dependence_recursion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_dependence_brute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A fitted estimator object implementing :term:`predict`, :term:`predict_proba`, or :term:`decision_function`. Multioutput-multiclass classifiers are not supported." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``X`` is used to generate a grid of values for the target ``features`` (where the partial dependence will be evaluated), and also to generate values for the complement features when the `method` is 'brute'." - }, - { - "name": "features", - "type": "Union[str, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature (e.g. `[0]`) or pair of interacting features (e.g. `[(0, 1)]`) for which the partial dependency should be computed." - }, - { - "name": "response_method", - "type": "Literal['auto', 'predict_proba', 'decision_function']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. For regressors this parameter is ignored and the response is always the output of :term:`predict`. By default, :term:`predict_proba` is tried first and we revert to :term:`decision_function` if it doesn't exist. If ``method`` is 'recursion', the response is always the output of :term:`decision_function`." - }, - { - "name": "percentiles", - "type": "Tuple[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper percentile used to create the extreme values for the grid. Must be in [0, 1]." - }, - { - "name": "grid_resolution", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of equally spaced points on the grid, for each target feature." - }, - { - "name": "method", - "type": "Literal['auto', 'recursion', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The method used to calculate the averaged predictions: - `'recursion'` is only supported for some tree-based estimators (namely :class:`~sklearn.ensemble.GradientBoostingClassifier`, :class:`~sklearn.ensemble.GradientBoostingRegressor`, :class:`~sklearn.ensemble.HistGradientBoostingClassifier`, :class:`~sklearn.ensemble.HistGradientBoostingRegressor`, :class:`~sklearn.tree.DecisionTreeRegressor`, :class:`~sklearn.ensemble.RandomForestRegressor`, ) when `kind='average'`. This is more efficient in terms of speed. With this method, the target response of a classifier is always the decision function, not the predicted probabilities. Since the `'recursion'` method implicitely computes the average of the Individual Conditional Expectation (ICE) by design, it is not compatible with ICE and thus `kind` must be `'average'`. - `'brute'` is supported for any estimator, but is more computationally intensive. - `'auto'`: the `'recursion'` is used for estimators that support it, and `'brute'` is used otherwise. Please see :ref:`this note ` for differences between the `'brute'` and `'recursion'` method." - }, - { - "name": "kind", - "type": "Literal['legacy', 'average', 'individual', 'both']", - "hasDefault": true, - "default": "'legacy'", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the partial dependence averaged across all the samples in the dataset or one line per sample or both. See Returns below. Note that the fast `method='recursion'` option is only available for `kind='average'`. Plotting individual dependencies requires using the slower `method='brute'` option. .. versionadded:: 0.24 .. deprecated:: 0.24 `kind='legacy'` is deprecated and will be removed in version 1.1. `kind='average'` will be the new default. It is intended to migrate from the ndarray output to :class:`~sklearn.utils.Bunch` output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide `.\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is 'brute'.\n\nfeatures : array-like of {int, str}\n The feature (e.g. `[0]`) or pair of interacting features\n (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is 'recursion', the response is always the output of\n :term:`decision_function`.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the grid. Must be in [0, 1].\n\ngrid_resolution : int, default=100\n The number of equally spaced points on the grid, for each target\n feature.\n\nmethod : {'auto', 'recursion', 'brute'}, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`,\n ) when `kind='average'`.\n This is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the Individual Conditional Expectation (ICE) by\n design, it is not compatible with ICE and thus `kind` must be\n `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\nkind : {'legacy', 'average', 'individual', 'both'}, default='legacy'\n Whether to return the partial dependence averaged across all the\n samples in the dataset or one line per sample or both.\n See Returns below.\n\n Note that the fast `method='recursion'` option is only available for\n `kind='average'`. Plotting individual dependencies requires using the\n slower `method='brute'` option.\n\n .. versionadded:: 0.24\n .. deprecated:: 0.24\n `kind='legacy'` is deprecated and will be removed in version 1.1.\n `kind='average'` will be the new default. It is intended to migrate\n from the ndarray output to :class:`~sklearn.utils.Bunch` output.\n\n\nReturns\n-------\npredictions : ndarray or :class:`~sklearn.utils.Bunch`\n\n - if `kind='legacy'`, return value is ndarray of shape (n_outputs, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if ``method``\n is 'recursion').\n\n - if `kind='individual'`, `'average'` or `'both'`, return value is :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n individual : ndarray of shape (n_outputs, n_instances, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid for all\n samples in X. This is also known as Individual\n Conditional Expectation (ICE)\n\n average : ndarray of shape (n_outputs, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if\n ``method`` is 'recursion').\n Only available when kind='both'.\n\n values : seq of 1d ndarrays\n The values with which the grid has been created. The generated\n grid is a cartesian product of the arrays in ``values``.\n ``len(values) == len(features)``. The size of each array\n ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller.\n\n ``n_outputs`` corresponds to the number of classes in a multi-class\n setting, or to the number of tasks for multi-output regression.\n For classical regression and binary classification ``n_outputs==1``.\n ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\nvalues : seq of 1d ndarrays\n The values with which the grid has been created. The generated grid\n is a cartesian product of the arrays in ``values``. ``len(values) ==\n len(features)``. The size of each array ``values[j]`` is either\n ``grid_resolution``, or the number of unique values in ``X[:, j]``,\n whichever is smaller. Only available when `kind=\"legacy\"`.\n\nSee Also\n--------\nplot_partial_dependence : Plot Partial Dependence.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> X = [[0, 0, 2], [1, 0, 0]]\n>>> y = [0, 1]\n>>> from sklearn.ensemble import GradientBoostingClassifier\n>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n... grid_resolution=2) # doctest: +SKIP\n(array([[-4.52..., 4.52...]]), [array([ 0., 1.])])" - } - ] - }, - { - "name": "sklearn.inspection._permutation_importance", - "imports": [ - "import numpy as np", - "from joblib import Parallel", - "from metrics import check_scoring", - "from utils import Bunch", - "from utils import check_random_state", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [], - "functions": [ - { - "name": "_weights_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_calculate_permutation_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate score when `col_idx` is permuted." - }, - { - "name": "permutation_importance", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator that has already been :term:`fitted` and is compatible with :term:`scorer`." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data on which permutation importance will be computed." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for supervised or `None` for unsupervised." - }, - { - "name": "scoring", - "type": "Optional[Union[Callable, str]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scorer to use. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of times to permute a feature." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. The computation is done by computing permutation score for each columns and parallelized over the columns. `None` means 1 unless in a :obj:`joblib.parallel_backend` context. `-1` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo-random number generator to control the permutations of each feature. Pass an int to get reproducible results across function calls. See :term: `Glossary `." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights used in scoring. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Permutation importance for feature evaluation [BRE]_.\n\nThe :term:`estimator` is required to be a fitted estimator. `X` can be the\ndata set used to train the estimator or a hold-out set. The permutation\nimportance of a feature is calculated as follows. First, a baseline metric,\ndefined by :term:`scoring`, is evaluated on a (potentially different)\ndataset defined by the `X`. Next, a feature column from the validation set\nis permuted and the metric is evaluated again. The permutation importance\nis defined to be the difference between the baseline metric and metric from\npermutating the feature column.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object\n An estimator that has already been :term:`fitted` and is compatible\n with :term:`scorer`.\n\nX : ndarray or DataFrame, shape (n_samples, n_features)\n Data on which permutation importance will be computed.\n\ny : array-like or None, shape (n_samples, ) or (n_samples, n_classes)\n Targets for supervised or `None` for unsupervised.\n\nscoring : string, callable or None, default=None\n Scorer to use. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used.\n\nn_repeats : int, default=5\n Number of times to permute a feature.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel. The computation is done by computing\n permutation score for each columns and parallelized over the columns.\n `None` means 1 unless in a :obj:`joblib.parallel_backend` context.\n `-1` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Pseudo-random number generator to control the permutations of each\n feature.\n Pass an int to get reproducible results across function calls.\n See :term: `Glossary `.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights used in scoring.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nresult : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n importances_mean : ndarray, shape (n_features, )\n Mean of feature importance over `n_repeats`.\n importances_std : ndarray, shape (n_features, )\n Standard deviation over `n_repeats`.\n importances : ndarray, shape (n_features, n_repeats)\n Raw permutation importance scores.\n\nReferences\n----------\n.. [BRE] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32,\n 2001. https://doi.org/10.1023/A:1010933404324\n\nExamples\n--------\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.inspection import permutation_importance\n>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],\n... [0, 9, 9],[0, 9, 9],[0, 9, 9]]\n>>> y = [1, 1, 1, 0, 0, 0]\n>>> clf = LogisticRegression().fit(X, y)\n>>> result = permutation_importance(clf, X, y, n_repeats=10,\n... random_state=0)\n>>> result.importances_mean\narray([0.4666..., 0. , 0. ])\n>>> result.importances_std\narray([0.2211..., 0. , 0. ])" - } - ] - }, - { - "name": "sklearn.inspection", - "imports": [ - "from _permutation_importance import permutation_importance", - "from _partial_dependence import partial_dependence", - "from _plot.partial_dependence import plot_partial_dependence", - "from _plot.partial_dependence import PartialDependenceDisplay" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection.tests.test_partial_dependence", - "imports": [ - "import numpy as np", - "import pytest", - "import sklearn", - "from sklearn.inspection import partial_dependence", - "from sklearn.inspection._partial_dependence import _grid_from_X", - "from sklearn.inspection._partial_dependence import _partial_dependence_brute", - "from sklearn.inspection._partial_dependence import _partial_dependence_recursion", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.ensemble import HistGradientBoostingRegressor", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import MultiTaskLasso", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.cluster import KMeans", - "from sklearn.compose import make_column_transformer", - "from sklearn.metrics import r2_score", - "from sklearn.preprocessing import PolynomialFeatures", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import RobustScaler", - "from sklearn.pipeline import make_pipeline", - "from sklearn.dummy import DummyClassifier", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils import _IS_32BIT", - "from sklearn.utils.validation import check_random_state", - "from sklearn.tree.tests.test_tree import assert_is_subtree" - ], - "classes": [ - { - "name": "NoPredictProbaNoDecisionFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_output_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_from_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_from_X_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_helpers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_recursion_decision_tree_vs_forest_and_gbdt", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_recursion_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_easy_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_slice_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_unknown_feature_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_unknown_feature_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_X_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_recursion_non_constant_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hist_gbdt_sw_not_supported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_feature_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kind_average_and_average_of_individual", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_for_kind_legacy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.inspection.tests.test_permutation_importance", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.compose import ColumnTransformer", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.dummy import DummyClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.impute import SimpleImputer", - "from sklearn.inspection import permutation_importance", - "from sklearn.model_selection import train_test_split", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import KBinsDiscretizer", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import scale", - "from sklearn.utils import parallel_backend", - "from sklearn.utils._testing import _convert_container" - ], - "classes": [], - "functions": [ - { - "name": "test_permutation_importance_correlated_feature_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_correlated_feature_regression_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robustness_to_high_cardinality_noisy_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_mixed_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_mixed_types_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_linear_regresssion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_equivalence_sequential_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_equivalence_array_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_large_memmaped_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_importance_no_weights_scoring_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.inspection.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection._plot.partial_dependence", - "imports": [ - "import numbers", - "from itertools import chain", - "from math import ceil", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats.mstats import mquantiles", - "from joblib import Parallel", - "from None import partial_dependence", - "from base import is_regressor", - "from utils import check_array", - "from utils import check_matplotlib_support", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "import matplotlib.pyplot as plt", - "from matplotlib import transforms", - "from matplotlib.gridspec import GridSpecFromSubplotSpec" - ], - "classes": [ - { - "name": "PartialDependenceDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "pd_results", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Results of :func:`~sklearn.inspection.partial_dependence` for ``features``." - }, - { - "name": "features", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of features for a given plot. A tuple of one integer will plot a partial dependence curve of one feature. A tuple of two integers will plot a two-way partial dependence curve as a contour plot." - }, - { - "name": "feature_names", - "type": "List[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature names corresponding to the indices in ``features``." - }, - { - "name": "target_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- In a multiclass setting, specifies the class for which the PDPs should be computed. Note that for binary classification, the positive class (index 1) is always used. - In a multioutput setting, specifies the task for which the PDPs should be computed. Ignored in binary classification or classical regression settings." - }, - { - "name": "pdp_lim", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Global min and max average predictions, such that all plots will have the same scale and y limits. `pdp_lim[1]` is the global min and max for single partial dependence curves. `pdp_lim[2]` is the global min and max for two-way partial dependence curves." - }, - { - "name": "deciles", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Deciles for feature indices in ``features``." - }, - { - "name": "kind", - "type": "Literal['average', 'individual', 'both']", - "hasDefault": true, - "default": "'average'", - "limitation": null, - "ignored": false, - "docstring": " Whether to plot the partial dependence averaged across all the samples in the dataset or one line per sample or both. - ``kind='average'`` results in the traditional PD plot; - ``kind='individual'`` results in the ICE plot. Note that the fast ``method='recursion'`` option is only available for ``kind='average'``. Plotting individual dependencies requires using the slower ``method='brute'`` option. .. versionadded:: 0.24" - }, - { - "name": "subsample", - "type": "Optional[Union[int, float]]", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Sampling for ICE curves when `kind` is 'individual' or 'both'. If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to be used to plot ICE curves. If int, represents the maximum absolute number of samples to use. Note that the full dataset is still used to calculate partial dependence when `kind='both'`. .. versionadded:: 0.24" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the selected samples when subsamples is not `None`. See :term:`Glossary ` for details. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_sample_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the number of samples as an integer." - }, - { - "name": "_plot_ice_lines", - "decorators": [], - "parameters": [ - { - "name": "preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The predictions computed for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "feature_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature values for which the predictions have been computed." - }, - { - "name": "n_ice_to_plot", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of ICE lines to plot." - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE lines." - }, - { - "name": "pd_plot_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "n_total_lines_by_plot", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The total number of lines expected to be plot on the axis." - }, - { - "name": "individual_line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the ICE lines." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot the ICE lines.\n\nParameters\n----------\npreds : ndarray of shape (n_instances, n_grid_points)\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nn_ice_to_plot : int\n The number of ICE lines to plot.\nax : Matplotlib axes\n The axis on which to plot the ICE lines.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nn_total_lines_by_plot : int\n The total number of lines expected to be plot on the axis.\nindividual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines." - }, - { - "name": "_plot_average_dependence", - "decorators": [], - "parameters": [ - { - "name": "avg_preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The average predictions for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "feature_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature values for which the predictions have been computed." - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE lines." - }, - { - "name": "pd_line_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the PD plot." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot the average partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nax : Matplotlib axes\n The axis on which to plot the ICE lines.\npd_line_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nline_kw : dict\n Dict with keywords passed when plotting the PD plot." - }, - { - "name": "_plot_one_way_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The predictions computed for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "avg_preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The average predictions for all points of `feature_values` for a given feature for all samples in `X`." - }, - { - "name": "feature_values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The feature values for which the predictions have been computed." - }, - { - "name": "feature_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index corresponding to the target feature." - }, - { - "name": "n_ice_lines", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of ICE lines to plot." - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE and PDP lines." - }, - { - "name": "n_cols", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of column in the axis." - }, - { - "name": "pd_plot_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "n_lines", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The total number of lines expected to be plot on the axis." - }, - { - "name": "individual_line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the ICE lines." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the PD plot." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot 1-way partial dependence: ICE and PDP.\n\nParameters\n----------\npreds : ndarray of shape (n_instances, n_grid_points) or None\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\navg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nfeature_idx : int\n The index corresponding to the target feature.\nn_ice_lines : int\n The number of ICE lines to plot.\nax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\nn_cols : int or None\n The number of column in the axis.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nn_lines : int\n The total number of lines expected to be plot on the axis.\nindividual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\nline_kw : dict\n Dict with keywords passed when plotting the PD plot." - }, - { - "name": "_plot_two_way_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "avg_preds", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The average predictions for all points of `feature_values[0]` and `feature_values[1]` for some given features for all samples in `X`." - }, - { - "name": "feature_values", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A sequence of array of the feature values for which the predictions have been computed." - }, - { - "name": "feature_idx", - "type": "Tuple[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the target features" - }, - { - "name": "ax", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which to plot the ICE and PDP lines." - }, - { - "name": "pd_plot_idx", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequential index of the plot. It will be unraveled to find the matching 2D position in the grid layout." - }, - { - "name": "Z_level", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Z-level used to encode the average predictions." - }, - { - "name": "contour_kw", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed when plotting the contours." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot 2-way partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_instances, n_grid_points, n_grid_points)\n The average predictions for all points of `feature_values[0]` and\n `feature_values[1]` for some given features for all samples in `X`.\nfeature_values : seq of 1d array\n A sequence of array of the feature values for which the predictions\n have been computed.\nfeature_idx : tuple of int\n The indices of the target features\nax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nZ_level : ndarray of shape (8, 8)\n The Z-level used to encode the average predictions.\ncontour_kw : dict\n Dict with keywords passed when plotting the contours." - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- If a single axis is passed in, it is treated as a bounding axes and a grid of partial dependence plots will be drawn within these bounds. The `n_cols` parameter controls the number of columns in the grid. - If an array-like of axes are passed in, the partial dependence plots will be drawn directly into these axes. - If `None`, a figure and a bounding axes is created and treated as the single axes case." - }, - { - "name": "n_cols", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of columns in the grid plot. Only active when `ax` is a single axes or `None`." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the `matplotlib.pyplot.plot` call. For one-way partial dependence plots." - }, - { - "name": "contour_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the `matplotlib.pyplot.contourf` call for two-way partial dependence plots." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot partial dependence plots.\n\nParameters\n----------\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\nn_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when\n `ax` is a single axes or `None`.\n\nline_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.contourf`\n call for two-way partial dependence plots.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`" - } - ], - "docstring": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.plot_partial_dependence` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\nParameters\n----------\npd_results : list of Bunch\n Results of :func:`~sklearn.inspection.partial_dependence` for\n ``features``.\n\nfeatures : list of (int,) or list of (int, int)\n Indices of features for a given plot. A tuple of one integer will plot\n a partial dependence curve of one feature. A tuple of two integers will\n plot a two-way partial dependence curve as a contour plot.\n\nfeature_names : list of str\n Feature names corresponding to the indices in ``features``.\n\ntarget_idx : int\n\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\npdp_lim : dict\n Global min and max average predictions, such that all plots will have\n the same scale and y limits. `pdp_lim[1]` is the global min and max for\n single partial dependence curves. `pdp_lim[2]` is the global min and\n max for two-way partial dependence curves.\n\ndeciles : dict\n Deciles for feature indices in ``features``.\n\nkind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If int, represents the\n maximum absolute number of samples to use.\n\n Note that the full dataset is still used to calculate partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None`. See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nbounding_ax_ : matplotlib Axes or None\n If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n grid of partial dependence plots are drawn. If `ax` is a list of axes\n or a numpy array of axes, `bounding_ax_` is None.\n\naxes_ : ndarray of matplotlib Axes\n If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n in `ax`. Elements that are None correspond to a nonexisting axes in\n that position.\n\nlines_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n curve on the i-th row and j-th column. If `ax` is a list of axes,\n `lines_[i]` is the partial dependence curve corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a line plot.\n\ndeciles_vlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the x axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a PDP plot.\n\n .. versionadded:: 0.23\n\ndeciles_hlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the y axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a 2-way plot.\n\n .. versionadded:: 0.23\n\ncontours_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n plot on the i-th row and j-th column. If `ax` is a list of axes,\n `contours_[i]` is the partial dependence plot corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a contour plot.\n\nfigure_ : matplotlib Figure\n Figure containing partial dependence plots.\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nplot_partial_dependence : Plot Partial Dependence." - } - ], - "functions": [ - { - "name": "plot_partial_dependence", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A fitted estimator object implementing :term:`predict`, :term:`predict_proba`, or :term:`decision_function`. Multioutput-multiclass classifiers are not supported." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "``X`` is used to generate a grid of values for the target ``features`` (where the partial dependence will be evaluated), and also to generate values for the complement features when the `method` is `'brute'`." - }, - { - "name": "features", - "type": "Union[str, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target features for which to create the PDPs. If `features[i]` is an integer or a string, a one-way PDP is created; if `features[i]` is a tuple, a two-way PDP is created (only supported with `kind='average'`). Each tuple must be of size 2. if any entry is a string, then it must be in ``feature_names``." - }, - { - "name": "feature_names", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of each feature; `feature_names[i]` holds the name of the feature with index `i`. By default, the name of the feature corresponds to their numerical index for NumPy array and their column name for pandas dataframe." - }, - { - "name": "target", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- In a multiclass setting, specifies the class for which the PDPs should be computed. Note that for binary classification, the positive class (index 1) is always used. - In a multioutput setting, specifies the task for which the PDPs should be computed. Ignored in binary classification or classical regression settings." - }, - { - "name": "response_method", - "type": "Literal['auto', 'predict_proba', 'decision_function']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. For regressors this parameter is ignored and the response is always the output of :term:`predict`. By default, :term:`predict_proba` is tried first and we revert to :term:`decision_function` if it doesn't exist. If ``method`` is `'recursion'`, the response is always the output of :term:`decision_function`." - }, - { - "name": "n_cols", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of columns in the grid plot. Only active when `ax` is a single axis or `None`." - }, - { - "name": "grid_resolution", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of equally spaced points on the axes of the plots, for each target feature." - }, - { - "name": "percentiles", - "type": "Tuple[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The lower and upper percentile used to create the extreme values for the PDP axes. Must be in [0, 1]." - }, - { - "name": "method", - "type": "str", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The method used to calculate the averaged predictions: - `'recursion'` is only supported for some tree-based estimators (namely :class:`~sklearn.ensemble.GradientBoostingClassifier`, :class:`~sklearn.ensemble.GradientBoostingRegressor`, :class:`~sklearn.ensemble.HistGradientBoostingClassifier`, :class:`~sklearn.ensemble.HistGradientBoostingRegressor`, :class:`~sklearn.tree.DecisionTreeRegressor`, :class:`~sklearn.ensemble.RandomForestRegressor` but is more efficient in terms of speed. With this method, the target response of a classifier is always the decision function, not the predicted probabilities. Since the `'recursion'` method implicitely computes the average of the ICEs by design, it is not compatible with ICE and thus `kind` must be `'average'`. - `'brute'` is supported for any estimator, but is more computationally intensive. - `'auto'`: the `'recursion'` is used for estimators that support it, and `'brute'` is used otherwise. Please see :ref:`this note ` for differences between the `'brute'` and `'recursion'` method." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to compute the partial dependences. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbose output during PD computations." - }, - { - "name": "line_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the ``matplotlib.pyplot.plot`` call. For one-way partial dependence plots." - }, - { - "name": "contour_kw", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call. For two-way partial dependence plots." - }, - { - "name": "ax", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- If a single axis is passed in, it is treated as a bounding axes and a grid of partial dependence plots will be drawn within these bounds. The `n_cols` parameter controls the number of columns in the grid. - If an array-like of axes are passed in, the partial dependence plots will be drawn directly into these axes. - If `None`, a figure and a bounding axes is created and treated as the single axes case. .. versionadded:: 0.22" - }, - { - "name": "kind", - "type": "Literal['average', 'individual', 'both']", - "hasDefault": true, - "default": "'average'", - "limitation": null, - "ignored": false, - "docstring": " Whether to plot the partial dependence averaged across all the samples in the dataset or one line per sample or both. - ``kind='average'`` results in the traditional PD plot; - ``kind='individual'`` results in the ICE plot. Note that the fast ``method='recursion'`` option is only available for ``kind='average'``. Plotting individual dependencies requires using the slower ``method='brute'`` option. .. versionadded:: 0.24" - }, - { - "name": "subsample", - "type": "Optional[Union[int, float]]", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Sampling for ICE curves when `kind` is 'individual' or 'both'. If `float`, should be between 0.0 and 1.0 and represent the proportion of the dataset to be used to plot ICE curves. If `int`, represents the absolute number samples to use. Note that the full dataset is still used to calculate averaged partial dependence when `kind='both'`. .. versionadded:: 0.24" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the selected samples when subsamples is not `None` and `kind` is either `'both'` or `'individual'`. See :term:`Glossary ` for details. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide `.\n\n.. note::\n\n :func:`plot_partial_dependence` does not support using the same axes\n with multiple calls. To plot the the partial dependence for multiple\n estimators, please pass the axes created by the first call to the\n second call::\n\n >>> from sklearn.inspection import plot_partial_dependence\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> X, y = make_friedman1()\n >>> est1 = LinearRegression().fit(X, y)\n >>> est2 = RandomForestRegressor().fit(X, y)\n >>> disp1 = plot_partial_dependence(est1, X,\n ... [1, 2]) # doctest: +SKIP\n >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n ... ax=disp1.axes_) # doctest: +SKIP\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n The target features for which to create the PDPs.\n If `features[i]` is an integer or a string, a one-way PDP is created;\n if `features[i]` is a tuple, a two-way PDP is created (only supported\n with `kind='average'`). Each tuple must be of size 2.\n if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n Name of each feature; `feature_names[i]` holds the name of the feature\n with index `i`.\n By default, the name of the feature corresponds to their numerical\n index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is `'recursion'`, the response is always the output of\n :term:`decision_function`.\n\nn_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when `ax`\n is a single axis or `None`.\n\ngrid_resolution : int, default=100\n The number of equally spaced points on the axes of the plots, for each\n target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`\n but is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the ICEs by design, it is not compatible with ICE and\n thus `kind` must be `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n The number of CPUs to use to compute the partial dependences.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n Verbose output during PD computations.\n\nline_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n .. versionadded:: 0.22\n\nkind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If `float`, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If `int`, represents the\n absolute number samples to use.\n\n Note that the full dataset is still used to calculate averaged partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None` and `kind` is either `'both'` or `'individual'`.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.inspection._plot", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.inspection._plot.tests.test_plot_partial_dependence", - "imports": [ - "import numpy as np", - "from scipy.stats.mstats import mquantiles", - "import pytest", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.utils._testing import _convert_container", - "from sklearn.inspection import plot_partial_dependence" - ], - "classes": [], - "functions": [ - { - "name": "diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "clf_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_kind", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_str_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_custom_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_passing_numpy_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_incorrent_num_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_with_same_axes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_feature_name_reuse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_multiclass_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_does_not_override_ylabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_partial_dependence_subsampling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_dependence_overwrite_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that make sure that we can overwrite the label of the PDP plot" - } - ] - }, - { - "name": "sklearn.inspection._plot.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model.setup", - "imports": [ - "import os", - "import numpy", - "from sklearn._build_utils import gen_from_templates", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model._base", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numbers", - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from scipy import optimize", - "from scipy import sparse", - "from scipy.special import expit", - "from joblib import Parallel", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import check_array", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from utils import check_random_state", - "from utils.extmath import safe_sparse_dot", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.sparsefuncs import inplace_column_scale", - "from utils.fixes import sparse_lsqr", - "from utils._seq_dataset import ArrayDataset32", - "from utils._seq_dataset import CSRDataset32", - "from utils._seq_dataset import ArrayDataset64", - "from utils._seq_dataset import CSRDataset64", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.fixes import delayed", - "from preprocessing import normalize as f_normalize" - ], - "classes": [ - { - "name": "LinearModel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\nC : array, shape (n_samples,)\n Returns predicted values." - }, - { - "name": "_set_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the intercept_\n " - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for Linear Models" - }, - { - "name": "LinearClassifierMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict confidence scores for samples.\n\nThe confidence score for a sample is proportional to the signed\ndistance of that sample to the hyperplane.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\narray, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence scores per (sample, class) combination. In the binary\n case, confidence score for self.classes_[1] where >0 means this\n class would be predicted." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class labels for samples in X.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\nC : array, shape [n_samples]\n Predicted class label per sample." - }, - { - "name": "_predict_proba_lr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimation for OvR logistic regression.\n\nPositive class probabilities are computed as\n1. / (1. + np.exp(-self.decision_function(X)));\nmulticlass is handled by normalizing that over all classes." - } - ], - "docstring": "Mixin for linear classifiers.\n\nHandles prediction for sparse and dense X." - }, - { - "name": "SparseCoefMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "densify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert coefficient matrix to dense array format.\n\nConverts the ``coef_`` member (back) to a numpy.ndarray. This is the\ndefault format of ``coef_`` and is required for fitting, so calling\nthis method is only required on models that have previously been\nsparsified; otherwise, it is a no-op.\n\nReturns\n-------\nself\n Fitted estimator." - }, - { - "name": "sparsify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert coefficient matrix to sparse format.\n\nConverts the ``coef_`` member to a scipy.sparse matrix, which for\nL1-regularized models can be much more memory- and storage-efficient\nthan the usual numpy.ndarray representation.\n\nThe ``intercept_`` member is not converted.\n\nReturns\n-------\nself\n Fitted estimator.\n\nNotes\n-----\nFor non-sparse models, i.e. when there are not many zeros in ``coef_``,\nthis may actually *increase* memory usage, so use this method with\ncare. A rule of thumb is that the number of zero elements, which can\nbe computed with ``(coef_ == 0).sum()``, must be more than 50% for this\nto provide significant benefits.\n\nAfter calling this method, further fitting with the partial_fit\nmethod (if any) will not work until you call densify." - } - ], - "docstring": "Mixin for converting coef_ to and from CSR format.\n\nL1-regularizing estimators should inherit this." - }, - { - "name": "LinearRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This will only provide speedup for n_targets > 1 and sufficient large problems. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive. This option is only supported for dense arrays. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary" - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.17\n parameter *sample_weight* support to LinearRegression.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This will only provide\n speedup for n_targets > 1 and sufficient large problems.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive. This\n option is only supported for dense arrays.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : array of shape (n_features, ) or (n_targets, n_features)\n Estimated coefficients for the linear regression problem.\n If multiple targets are passed during the fit (y 2D), this\n is a 2D array of shape (n_targets, n_features), while if only\n one target is passed, this is a 1D array of length n_features.\n\nrank_ : int\n Rank of matrix `X`. Only available when `X` is dense.\n\nsingular_ : array of shape (min(X, y),)\n Singular values of `X`. Only available when `X` is dense.\n\nintercept_ : float or array of shape (n_targets,)\n Independent term in the linear model. Set to 0.0 if\n `fit_intercept = False`.\n\nSee Also\n--------\nRidge : Ridge regression addresses some of the\n problems of Ordinary Least Squares by imposing a penalty on the\n size of the coefficients with l2 regularization.\nLasso : The Lasso is a linear model that estimates\n sparse coefficients with l1 regularization.\nElasticNet : Elastic-Net is a linear regression\n model trained with both l1 and l2 -norm regularization of the\n coefficients.\n\nNotes\n-----\nFrom the implementation point of view, this is just plain Ordinary\nLeast Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n(scipy.optimize.nnls) wrapped as a predictor object.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n>>> # y = 1 * x_0 + 2 * x_1 + 3\n>>> y = np.dot(X, np.array([1, 2])) + 3\n>>> reg = LinearRegression().fit(X, y)\n>>> reg.score(X, y)\n1.0\n>>> reg.coef_\narray([1., 2.])\n>>> reg.intercept_\n3.0000...\n>>> reg.predict(np.array([[3, 5]]))\narray([16.])" - } - ], - "functions": [ - { - "name": "make_dataset", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create ``Dataset`` abstraction for sparse and dense inputs.\n\nThis also returns the ``intercept_decay`` which is different\nfor sparse datasets.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data\n\ny : array-like, shape (n_samples, )\n Target values.\n\nsample_weight : numpy array of shape (n_samples,)\n The weight of each sample\n\nrandom_state : int, RandomState instance or None (default)\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndataset\n The ``Dataset`` abstraction\nintercept_decay\n The intercept decay" - }, - { - "name": "_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\n X = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nreturn_mean=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype" - }, - { - "name": "_rescale_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight.\n\nReturns\n-------\nX_rescaled : {array-like, sparse matrix}\n\ny_rescaled : {array-like, sparse matrix}" - }, - { - "name": "_pre_fit", - "decorators": [], - "parameters": [ - { - "name": "order", - "type": "Optional[Literal['F', 'C']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether X and y will be forced to be fortran or c-style. Only relevant if sample_weight is not None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Aux function used at beginning of fit in linear models\n\nParameters\n----------\norder : 'F', 'C' or None, default=None\n Whether X and y will be forced to be fortran or c-style. Only relevant\n if sample_weight is not None." - } - ] - }, - { - "name": "sklearn.linear_model._bayes", - "imports": [ - "from math import log", - "import numpy as np", - "from scipy import linalg", - "from _base import LinearModel", - "from _base import _rescale_data", - "from base import RegressorMixin", - "from utils.extmath import fast_logdet", - "from scipy.linalg import pinvh", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BayesianRidge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. Should be greater than or equal to 1." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Stop the algorithm if w has converged." - }, - { - "name": "alpha_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "alpha_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "lambda_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "lambda_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "alpha_init", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for alpha (precision of the noise). If not set, alpha_init is 1/Var(y). .. versionadded:: 0.22" - }, - { - "name": "lambda_init", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial value for lambda (precision of the weights). If not set, lambda_init is 1. .. versionadded:: 0.22" - }, - { - "name": "compute_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, compute the log marginal likelihood at each iteration of the optimization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. The intercept is not treated as a probabilistic parameter and thus has no associated variance. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode when fitting the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary" - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample .. versionadded:: 0.20 parameter *sample_weight* support to BayesianRidge." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data\ny : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.20\n parameter *sample_weight* support to BayesianRidge.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the standard deviation of posterior prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points." - }, - { - "name": "_update_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n + np.dot(X.T, X))^-1" - }, - { - "name": "_log_marginal_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Log marginal likelihood." - } - ], - "docstring": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_iter : int, default=300\n Maximum number of iterations. Should be greater than or equal to 1.\n\ntol : float, default=1e-3\n Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\nalpha_init : float, default=None\n Initial value for alpha (precision of the noise).\n If not set, alpha_init is 1/Var(y).\n\n .. versionadded:: 0.22\n\nlambda_init : float, default=None\n Initial value for lambda (precision of the weights).\n If not set, lambda_init is 1.\n\n .. versionadded:: 0.22\n\ncompute_score : bool, default=False\n If True, compute the log marginal likelihood at each iteration of the\n optimization.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model.\n The intercept is not treated as a probabilistic parameter\n and thus has no associated variance. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\nintercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float\n Estimated precision of the noise.\n\nlambda_ : float\n Estimated precision of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n Estimated variance-covariance matrix of the weights\n\nscores_ : array-like of shape (n_iter_+1,)\n If computed_score is True, value of the log marginal likelihood (to be\n maximized) at each iteration of the optimization. The array starts\n with the value of the log marginal likelihood obtained for the initial\n values of alpha and lambda and ends with the value obtained for the\n estimated alpha and lambda.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\nX_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\nX_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.BayesianRidge()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nBayesianRidge()\n>>> clf.predict([[1, 1]])\narray([1.])\n\nNotes\n-----\nThere exist several strategies to perform Bayesian ridge regression. This\nimplementation is based on the algorithm described in Appendix A of\n(Tipping, 2001) where updates of the regularization parameters are done as\nsuggested in (MacKay, 1992). Note that according to A New\nView of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\nupdate rules do not guarantee that the marginal likelihood is increasing\nbetween two consecutive iterations of the optimization.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\nVol. 4, No. 3, 1992.\n\nM. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\nJournal of Machine Learning Research, Vol. 1, 2001." - }, - { - "name": "ARDRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Stop the algorithm if w has converged." - }, - { - "name": "alpha_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "alpha_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter." - }, - { - "name": "lambda_1", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "lambda_2", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter." - }, - { - "name": "compute_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, compute the objective function at each step of the model." - }, - { - "name": "threshold_lambda", - "type": "float", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "threshold for removing (pruning) weights with high precision from the computation." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode when fitting the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers). Will be cast to X's dtype if necessary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the ARDRegression model according to the given training data\nand parameters.\n\nIterative procedure to maximize the evidence\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\ny : array-like of shape (n_samples,)\n Target values (integers). Will be cast to X's dtype if necessary\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_update_sigma_woodbury", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_sigma", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - }, - { - "name": "return_std", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the standard deviation of posterior prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points." - } - ], - "docstring": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_iter : int, default=300\n Maximum number of iterations.\n\ntol : float, default=1e-3\n Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\ncompute_score : bool, default=False\n If True, compute the objective function at each step of the model.\n\nthreshold_lambda : float, default=10 000\n threshold for removing (pruning) weights with high precision from\n the computation.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\nalpha_ : float\n estimated precision of the noise.\n\nlambda_ : array-like of shape (n_features,)\n estimated precisions of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n estimated variance-covariance matrix of the weights\n\nscores_ : float\n if computed, value of the objective function (to be maximized)\n\nintercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nX_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\nX_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.ARDRegression()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nARDRegression()\n>>> clf.predict([[1, 1]])\narray([1.])\n\nNotes\n-----\nFor an example, see :ref:`examples/linear_model/plot_ard.py\n`.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian nonlinear modeling for the prediction\ncompetition, ASHRAE Transactions, 1994.\n\nR. Salakhutdinov, Lecture notes on Statistical Machine Learning,\nhttp://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\nTheir beta is our ``self.alpha_``\nTheir alpha is our ``self.lambda_``\nARD is a little different than the slide: only dimensions/features for\nwhich ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\ndiscarded." - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._coordinate_descent", - "imports": [ - "import sys", - "import warnings", - "import numbers", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy import sparse", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _base import LinearModel", - "from _base import _pre_fit", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from _base import _preprocess_data", - "from utils import check_array", - "from utils.validation import check_random_state", - "from model_selection import check_cv", - "from utils.extmath import safe_sparse_dot", - "from utils.fixes import _astype_copy_false", - "from utils.fixes import _joblib_parallel_args", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from None import _cd_fast as cd_fast" - ], - "classes": [ - { - "name": "ElasticNet", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty terms. Defaults to 1.0. See the notes for the exact mathematical meaning of this parameter. ``alpha = 0`` is equivalent to an ordinary least square, solved by the :class:`LinearRegression` object. For numerical reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised. Given this, you should use the :class:`LinearRegression` object." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If ``False``, the data is assumed to be already centered." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. The Gram matrix can also be passed as argument. For sparse input this option is always ``True`` to preserve sparsity." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target. Will be cast to X's dtype if necessary." - }, - { - "name": "sample_weight", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weight. .. versionadded:: 0.23" - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model with coordinate descent.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of (n_samples, n_features)\n Data.\n\ny : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)\n Target. Will be cast to X's dtype if necessary.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Sample weight.\n\n .. versionadded:: 0.23\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nNotes\n-----\n\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format." - }, - { - "name": "sparse_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sparse representation of the fitted `coef_`." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decision function of the linear model.\n\nParameters\n----------\nX : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\nReturns\n-------\nT : ndarray of shape (n_samples,)\n The predicted decision function." - } - ], - "docstring": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * L1 + b * L2\n\nwhere::\n\n alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the penalty terms. Defaults to 1.0.\n See the notes for the exact mathematical meaning of this\n parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n solved by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\nl1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a\n combination of L1 and L2.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If ``False``, the\n data is assumed to be already centered.\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or array-like of shape (n_features, n_features), default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. The Gram matrix can also be passed as argument.\n For sparse input this option is always ``True`` to preserve sparsity.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNet\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNet(random_state=0)\n>>> regr.fit(X, y)\nElasticNet(random_state=0)\n>>> print(regr.coef_)\n[18.83816048 64.55968825]\n>>> print(regr.intercept_)\n1.451...\n>>> print(regr.predict([[0, 0]]))\n[1.451...]\n\n\nNotes\n-----\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nSee Also\n--------\nElasticNetCV : Elastic net model with best model selection by\n cross-validation.\nSGDRegressor : Implements elastic net regression with incremental training.\nSGDClassifier : Implements logistic regression with elastic net penalty\n (``SGDClassifier(loss=\"log\", penalty=\"elasticnet\")``)." - }, - { - "name": "Lasso", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the L1 term. Defaults to 1.0. ``alpha = 0`` is equivalent to an ordinary least square, solved by the :class:`LinearRegression` object. For numerical reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised. Given this, you should use the :class:`LinearRegression` object." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument. For sparse input this option is always ``True`` to preserve sparsity." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear Model trained with L1 prior as regularizer (aka the Lasso)\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1 term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument. For sparse input\n this option is always ``True`` to preserve sparsity.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\nsparse_coef_ : sparse matrix of shape (n_features, 1) or (n_targets, n_features)\n Readonly property derived from ``coef_``.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : int or list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.Lasso(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nLasso(alpha=0.1)\n>>> print(clf.coef_)\n[0.85 0. ]\n>>> print(clf.intercept_)\n0.15...\n\nSee Also\n--------\nlars_path\nlasso_path\nLassoLars\nLassoCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array." - }, - { - "name": "LinearModelCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Model to be fitted after the best alpha has been determined." - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bool indicating if class is meant for multidimensional target." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If y is mono-output, X can be sparse." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit linear model with coordinate descent.\n\nFit is on grid of alphas and best alpha estimated by cross-validation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data\n to avoid unnecessary memory duplication. If y is mono-output,\n X can be sparse.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values." - } - ], - "docstring": "Base class for iterative model fitting along a regularization path." - }, - { - "name": "LassoCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If ``None`` alphas are set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If positive, restrict regression coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n If positive, restrict regression coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n The dual gap at the end of the optimization for the optimal alpha\n (``alpha_``).\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.predict(X[:1,])\narray([-78.4951...])\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nSee Also\n--------\nlars_path\nlasso_path\nLassoLars\nLasso\nLassoLarsCV" - }, - { - "name": "ElasticNetCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "l1_ratio", - "type": "Union[List[float], float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2 This parameter can be a list, in which case the different values are tested by cross-validation and the one giving the best prediction score is used. Note that a good choice of list of values for l1_ratio is often to put more values close to 1 (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7, .9, .95, .99, 1]``." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path, used for each l1_ratio." - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If None alphas are set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, forces the coefficients to be positive." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0``\n the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path, used for each l1_ratio.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nl1_ratio_ : float\n The compromise between l1 and l2 penalization chosen by\n cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets, n_features)\n Independent term in the decision function.\n\nmse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n Mean square error for the test set on each fold, varying l1_ratio and\n alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\ndual_gap_ : float\n The dual gaps at the end of the optimization for the optimal alpha.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNetCV\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNetCV(cv=5, random_state=0)\n>>> regr.fit(X, y)\nElasticNetCV(cv=5, random_state=0)\n>>> print(regr.alpha_)\n0.199...\n>>> print(regr.intercept_)\n0.398...\n>>> print(regr.predict([[0, 0]]))\n[0.398...]\n\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package\nwhile alpha corresponds to the lambda parameter in glmnet.\nMore specifically, the optimization objective is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * L1 + b * L2\n\nfor::\n\n alpha = a + b and l1_ratio = a / (a + b).\n\nSee Also\n--------\nenet_path\nElasticNet" - }, - { - "name": "MultiTaskElasticNet", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the L1/L2 term. Defaults to 1.0." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The ElasticNet mixing parameter, with 0 < l1_ratio <= 1. For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it is an L2 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target. Will be cast to X's dtype if necessary." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit MultiTaskElasticNet model with coordinate descent\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data.\ny : ndarray of shape (n_samples, n_tasks)\n Target. Will be cast to X's dtype if necessary.\n\nNotes\n-----\n\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as\nregularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nl1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula). If a 1D y is\n passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : float\n The dual gaps at the end of the optimization.\n\neps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNet(alpha=0.1)\n>>> print(clf.coef_)\n[[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n>>> print(clf.intercept_)\n[0.0872422 0.0872422]\n\nSee Also\n--------\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n cross-validation.\nElasticNet\nMultiTaskLasso\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - }, - { - "name": "MultiTaskLasso", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the L1/L2 term. Defaults to 1.0." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\neps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n>>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\nMultiTaskLasso(alpha=0.1)\n>>> print(clf.coef_)\n[[0. 0.60809415]\n[0. 0.94592424]]\n>>> print(clf.intercept_)\n[-0.41888636 -0.87382323]\n\nSee Also\n--------\nMultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\nLasso\nMultiTaskElasticNet\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - }, - { - "name": "MultiTaskElasticNetCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "l1_ratio", - "type": "Union[List[float], float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The ElasticNet mixing parameter, with 0 < l1_ratio <= 1. For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it is an L2 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2. This parameter can be a list, in which case the different values are tested by cross-validation and the one giving the best prediction score is used. Note that a good choice of list of values for l1_ratio is often to put more values close to 1 (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7, .9, .95, .99, 1]``" - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If not provided, set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. Note that this is used only if multiple values for l1_ratio are given. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds) or (n_l1_ratio, n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\nl1_ratio_ : float\n Best l1_ratio obtained by cross-validation.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n>>> clf.fit([[0,0], [1, 1], [2, 2]],\n... [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNetCV(cv=3)\n>>> print(clf.coef_)\n[[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n>>> print(clf.intercept_)\n[0.00166409 0.00166409]\n\nSee Also\n--------\nMultiTaskElasticNet\nElasticNetCV\nMultiTaskLassoCV\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - }, - { - "name": "MultiTaskLassoCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If not provided, set automatically." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - int, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. Note that this is used only if multiple values for l1_ratio are given. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "selection", - "type": "Literal['cyclic', 'random']", - "hasDefault": true, - "default": "'cyclic'", - "limitation": null, - "ignored": false, - "docstring": "If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_multitask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import MultiTaskLassoCV\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.metrics import r2_score\n>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n>>> r2_score(y, reg.predict(X))\n0.9994...\n>>> reg.alpha_\n0.5713...\n>>> reg.predict(X[:1,])\narray([[153.7971..., 94.9015...]])\n\nSee Also\n--------\nMultiTaskElasticNet\nElasticNetCV\nMultiTaskElasticNetCV\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays." - } - ], - "functions": [ - { - "name": "_set_order", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "order", - "type": "Optional[Literal['C', 'F']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'C', dense arrays are returned as C-ordered, sparse matrices in csr format. If 'F', dense arrays are return as F-ordered, sparse matrices in csc format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Change the order of X and y if necessary.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\norder : {None, 'C', 'F'}\n If 'C', dense arrays are returned as C-ordered, sparse matrices in csr\n format. If 'F', dense arrays are return as F-ordered, sparse matrices\n in csc format.\n\nReturns\n-------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data with guaranteed order.\n\ny : ndarray of shape (n_samples,)\n Target values with guaranteed order." - }, - { - "name": "_alpha_grid", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The elastic net mixing parameter, with ``0 < l1_ratio <= 1``. For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not supported) ``For l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``" - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit an intercept or not" - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the grid of alpha values for elastic net parameter search\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication\n\ny : ndarray of shape (n_samples,)\n Target values\n\nXy : array-like of shape (n_features,), default=None\n Xy = np.dot(X.T, y) that can be precomputed.\n\nl1_ratio : float, default=1.0\n The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n Number of alphas along the regularization path\n\nfit_intercept : bool, default=True\n Whether to fit an intercept or not\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "lasso_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If ``y`` is mono-output then ``X`` can be sparse." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``" - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path" - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If ``None`` alphas are set automatically" - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial values of the coefficients." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether to return the number of iterations or not." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to True, forces coefficients to be positive. (Only allowed when ``y.ndim == 1``)." - }, - { - "name": "**params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "keyword arguments passed to the coordinate descent solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Lasso path with coordinate descent\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n Number of alphas along the regularization path\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n whether to return the number of iterations or not.\n\npositive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\n**params : kwargs\n keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nNote that in certain cases, the Lars solver may be significantly\nfaster to implement this functionality. In particular, linear\ninterpolation can be used to retrieve model coefficients between the\nvalues output by lars_path\n\nExamples\n--------\n\nComparing lasso_path and lars_path with interpolation:\n\n>>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n>>> y = np.array([1, 2, 3.1])\n>>> # Use lasso_path to compute a coefficient path\n>>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n>>> print(coef_path)\n[[0. 0. 0.46874778]\n [0.2159048 0.4425765 0.23689075]]\n\n>>> # Now use lars_path and 1D linear interpolation to compute the\n>>> # same path\n>>> from sklearn.linear_model import lars_path\n>>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n>>> from scipy import interpolate\n>>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n... coef_path_lars[:, ::-1])\n>>> print(coef_path_continuous([5., 1., .5]))\n[[0. 0. 0.46915237]\n [0.2159048 0.4425765 0.23668876]]\n\nSee Also\n--------\nlars_path\nLasso\nLassoLars\nLassoCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode" - }, - { - "name": "enet_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Pass directly as Fortran-contiguous data to avoid unnecessary memory duplication. If ``y`` is mono-output then ``X`` can be sparse." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Number between 0 and 1 passed to elastic net (scaling between l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``." - }, - { - "name": "n_alphas", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of alphas along the regularization path." - }, - { - "name": "alphas", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of alphas where to compute the models. If None alphas are set automatically." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial values of the coefficients." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Amount of verbosity." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations or not." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to True, forces coefficients to be positive. (Only allowed when ``y.ndim == 1``)." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If set to False, the input validation checks are skipped (including the Gram matrix when provided). It is assumed that they are handled by the caller." - }, - { - "name": "**params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments passed to the coordinate descent solver." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nl1_ratio : float, default=0.5\n Number between 0 and 1 passed to elastic net (scaling between\n l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations or not.\n\npositive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\ncheck_input : bool, default=True\n If set to False, the input validation checks are skipped (including the\n Gram matrix when provided). It is assumed that they are handled\n by the caller.\n\n**params : kwargs\n Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n (Is returned when ``return_n_iter`` is set to True).\n\nSee Also\n--------\nMultiTaskElasticNet\nMultiTaskElasticNetCV\nElasticNet\nElasticNetCV\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n`." - }, - { - "name": "_path_residuals", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "train", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the train set." - }, - { - "name": "test", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the test set." - }, - { - "name": "path", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function returning a list of models on the path. See enet_path for an example of signature." - }, - { - "name": "path_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the path function." - }, - { - "name": "alphas", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of float that is used for cross-validation. If not provided, computed using 'path'." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2." - }, - { - "name": "X_order", - "type": "Literal['F', 'C']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The order of the arrays expected by the path function to avoid memory copies." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dtype of the arrays expected by the path function to avoid memory copies." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the MSE for the models computed by 'path'.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\ntrain : list of indices\n The indices of the train set.\n\ntest : list of indices\n The indices of the test set.\n\npath : callable\n Function returning a list of models on the path. See\n enet_path for an example of signature.\n\npath_params : dictionary\n Parameters passed to the path function.\n\nalphas : array-like, default=None\n Array of float that is used for cross-validation. If not\n provided, computed using 'path'.\n\nl1_ratio : float, default=1\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\nX_order : {'F', 'C'}, default=None\n The order of the arrays expected by the path function to\n avoid memory copies.\n\ndtype : a numpy dtype, default=None\n The dtype of the arrays expected by the path function to\n avoid memory copies." - } - ] - }, - { - "name": "sklearn.linear_model._huber", - "imports": [ - "import numpy as np", - "from scipy import optimize", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from _base import LinearModel", - "from utils import axis0_safe_slice", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import safe_sparse_dot", - "from utils.optimize import _check_optimize_result" - ], - "classes": [ - { - "name": "HuberRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The parameter epsilon controls the number of samples that should be classified as outliers. The smaller the epsilon, the more robust it is to outliers." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations that ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This is useful if the stored attributes of a previously used model has to be reused. If set to False, then the coefficients will be rewritten for every call to fit. See :term:`the Glossary `." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to fit the intercept. This can be set to False if the data is already centered around the origin." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-05", - "limitation": null, - "ignored": false, - "docstring": "The iteration will stop when ``max{|proj g_i | i = 1, ..., n}`` <= ``tol`` where pg_i is the i-th component of the projected gradient." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight given to each sample." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like, shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like, shape (n_samples,)\n Weight given to each sample.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThis makes sure that the loss function is not heavily influenced by the\noutliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide `\n\n.. versionadded:: 0.18\n\nParameters\n----------\nepsilon : float, greater than 1.0, default=1.35\n The parameter epsilon controls the number of samples that should be\n classified as outliers. The smaller the epsilon, the more robust it is\n to outliers.\n\nmax_iter : int, default=100\n Maximum number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\nalpha : float, default=0.0001\n Regularization parameter.\n\nwarm_start : bool, default=False\n This is useful if the stored attributes of a previously used model\n has to be reused. If set to False, then the coefficients will\n be rewritten for every call to fit.\n See :term:`the Glossary `.\n\nfit_intercept : bool, default=True\n Whether or not to fit the intercept. This can be set to False\n if the data is already centered around the origin.\n\ntol : float, default=1e-05\n The iteration will stop when\n ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n where pg_i is the i-th component of the projected gradient.\n\nAttributes\n----------\ncoef_ : array, shape (n_features,)\n Features got by optimizing the Huber loss.\n\nintercept_ : float\n Bias.\n\nscale_ : float\n The value by which ``|y - X'w - c|`` is scaled down.\n\nn_iter_ : int\n Number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\noutliers_ : array, shape (n_samples,)\n A boolean mask which is set to True where the samples are identified\n as outliers.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import HuberRegressor, LinearRegression\n>>> from sklearn.datasets import make_regression\n>>> rng = np.random.RandomState(0)\n>>> X, y, coef = make_regression(\n... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n>>> X[:4] = rng.uniform(10, 20, (4, 2))\n>>> y[:4] = rng.uniform(10, 20, 4)\n>>> huber = HuberRegressor().fit(X, y)\n>>> huber.score(X, y)\n-7.284...\n>>> huber.predict(X[:1,])\narray([806.7200...])\n>>> linear = LinearRegression().fit(X, y)\n>>> print(\"True coefficients:\", coef)\nTrue coefficients: [20.4923... 34.1698...]\n>>> print(\"Huber coefficients:\", huber.coef_)\nHuber coefficients: [17.7906... 31.0106...]\n>>> print(\"Linear Regression coefficients:\", linear.coef_)\nLinear Regression coefficients: [-1.9221... 7.0226...]\n\nReferences\n----------\n.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n Concomitant scale estimates, pg 172\n.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n https://statweb.stanford.edu/~owen/reports/hhu.pdf" - } - ], - "functions": [ - { - "name": "_huber_loss_and_gradient", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Feature vector. w[:n_features] gives the coefficients w[-1] gives the scale factor and if the intercept is fit w[-2] gives the intercept factor." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Robustness of the Huber estimator." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight assigned to each sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the Huber loss and the gradient.\n\nParameters\n----------\nw : ndarray, shape (n_features + 1,) or (n_features + 2,)\n Feature vector.\n w[:n_features] gives the coefficients\n w[-1] gives the scale factor and if the intercept is fit w[-2]\n gives the intercept factor.\n\nX : ndarray of shape (n_samples, n_features)\n Input data.\n\ny : ndarray of shape (n_samples,)\n Target vector.\n\nepsilon : float\n Robustness of the Huber estimator.\n\nalpha : float\n Regularization parameter.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Weight assigned to each sample.\n\nReturns\n-------\nloss : float\n Huber loss.\n\ngradient : ndarray, shape (len(w))\n Returns the derivative of the Huber loss with respect to each\n coefficient, intercept and the scale as a vector." - } - ] - }, - { - "name": "sklearn.linear_model._least_angle", - "imports": [ - "from math import log", - "import sys", - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy import interpolate", - "from scipy.linalg.lapack import get_lapack_funcs", - "from joblib import Parallel", - "from _base import LinearModel", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import arrayfuncs", - "from utils import as_float_array", - "from utils import check_random_state", - "from model_selection import check_cv", - "from exceptions import ConvergenceWarning", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "Lars", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Target number of non-zero coefficients. Use ``np.inf`` for no limit." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - }, - { - "name": "fit_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha." - }, - { - "name": "jitter", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on a uniform noise parameter to be added to the `y` values, to satisfy the model's assumption of one-at-a-time computations. Might help with stability. .. versionadded:: 0.23" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for jittering. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. Ignored if `jitter` is None. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Auxiliary method to fit the model using X, y as training data" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Least Angle Regression model a.k.a. LAR\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nn_nonzero_coefs : int, default=500\n Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n If True the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\njitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\nactive_ : list of shape (n_alphas,) or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list of such arrays\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.Lars(n_nonzero_coefs=1)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLars(n_nonzero_coefs=1)\n>>> print(reg.coef_)\n[ 0. -1.11...]\n\nSee Also\n--------\nlars_path, LarsCV\nsklearn.decomposition.sparse_encode" - }, - { - "name": "LassoLars", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term. Defaults to 1.0. ``alpha = 0`` is equivalent to an ordinary least square, solved by :class:`LinearRegression`. For numerical reasons, using ``alpha = 0`` with the LassoLars object is not advised and you should prefer the LinearRegression object." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "fit_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True`` the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator." - }, - { - "name": "jitter", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Upper bound on a uniform noise parameter to be added to the `y` values, to satisfy the model's assumption of one-at-a-time computations. Might help with stability. .. versionadded:: 0.23" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for jittering. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. Ignored if `jitter` is None. .. versionadded:: 0.23" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Lasso model fit with Least Angle Regression a.k.a. Lars\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the penalty term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by :class:`LinearRegression`. For numerical reasons, using\n ``alpha = 0`` with the LassoLars object is not advised and you\n should prefer the LinearRegression object.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n If ``True`` the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients will not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n\njitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\nactive_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list of such arrays\n If a list is passed it's expected to be one of n_targets such arrays.\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLars(alpha=0.01)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\nLassoLars(alpha=0.01)\n>>> print(reg.coef_)\n[ 0. -0.963257...]\n\nSee Also\n--------\nlars_path\nlasso_path\nLasso\nLassoCV\nLassoLarsCV\nLassoLarsIC\nsklearn.decomposition.sparse_encode" - }, - { - "name": "LarsCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix cannot be passed as argument since we will use only subsets of X." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "max_n_alphas", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of points on the path used to compute the residuals in the cross-validation" - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, X will be copied; else, it may be overwritten." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\nn_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nAttributes\n----------\nactive_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of lists, the outer list length is `n_targets`.\n\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\nalpha_ : float\n the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import LarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n>>> reg = LarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9996...\n>>> reg.alpha_\n0.0254...\n>>> reg.predict(X[:1,])\narray([154.0842...])\n\nSee Also\n--------\nlars_path, LassoLars, LassoLarsCV" - }, - { - "name": "LassoLarsCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix cannot be passed as argument since we will use only subsets of X." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "max_n_alphas", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of points on the path used to compute the residuals in the cross-validation" - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients do not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator. As a consequence using LassoLarsCV only makes sense for problems where a sparse solution is expected and/or reached." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or 'auto' , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\nn_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsCV only makes sense for problems where\n a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function.\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\nalpha_ : float\n the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\nactive_ : list of int\n Indices of active variables at the end of the path.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoLarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4.0, random_state=0)\n>>> reg = LassoLarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9992...\n>>> reg.alpha_\n0.0484...\n>>> reg.predict(X[:1,])\narray([-77.8723...])\n\nNotes\n-----\n\nThe object solves the same problem as the LassoCV object. However,\nunlike the LassoCV, it find the relevant alphas values by itself.\nIn general, because of this property, it will be more stable.\nHowever, it is more fragile to heavily multicollinear datasets.\n\nIt is more efficient than the LassoCV if only a small number of\nfeatures are selected compared to the total number, for instance if\nthere are very few samples compared to the number of features.\n\nSee Also\n--------\nlars_path, LassoLars, LarsCV, LassoCV" - }, - { - "name": "LassoLarsIC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal['bic', 'aic']", - "hasDefault": true, - "default": "'aic'", - "limitation": null, - "ignored": false, - "docstring": "The type of criterion to use." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[ArrayLike, bool, Literal['auto']]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform. Can be used for early stopping." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients do not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator. As a consequence using LassoLarsIC only makes sense for problems where a sparse solution is expected and/or reached." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "target values. Will be cast to X's dtype if necessary" - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If provided, this parameter will override the choice of copy_X made at instance creation. If ``True``, X will be copied; else, it may be overwritten." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n training data.\n\ny : array-like of shape (n_samples,)\n target values. Will be cast to X's dtype if necessary\n\ncopy_X : bool, default=None\n If provided, this parameter will override the choice\n of copy_X made at instance creation.\n If ``True``, X will be copied; else, it may be overwritten.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Lasso model fit with Lars using BIC or AIC for model selection\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion and BIC is the Bayes\nInformation criterion. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {'bic' , 'aic'}, default='aic'\n The type of criterion to use.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform. Can be used for\n early stopping.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsIC only makes sense for problems where\n a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function.\n\nalpha_ : float\n the alpha parameter chosen by the information criterion\n\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If a list, it will be of length `n_targets`.\n\nn_iter_ : int\n number of iterations run by lars_path to find the grid of\n alphas.\n\ncriterion_ : array-like of shape (n_alphas,)\n The value of the information criteria ('aic', 'bic') across all\n alphas. The alpha which has the smallest information criterion is\n chosen. This value is larger by a factor of ``n_samples`` compared to\n Eqns. 2.15 and 2.16 in (Zou et al, 2007).\n\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLarsIC(criterion='bic')\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLassoLarsIC(criterion='bic')\n>>> print(reg.coef_)\n[ 0. -1.11...]\n\nNotes\n-----\nThe estimation of the number of degrees of freedom is given by:\n\n\"On the degrees of freedom of the lasso\"\nHui Zou, Trevor Hastie, and Robert Tibshirani\nAnn. Statist. Volume 35, Number 5 (2007), 2173-2192.\n\nhttps://en.wikipedia.org/wiki/Akaike_information_criterion\nhttps://en.wikipedia.org/wiki/Bayesian_information_criterion\n\nSee Also\n--------\nlars_path, LassoLars, LassoLarsCV" - } - ], - "functions": [ - { - "name": "lars_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. Note that if X is None then the Gram matrix must be specified, i.e., cannot be None or False." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "Gram", - "type": "Optional[Union[Literal['auto'], ArrayLike]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform, set to infinity for no limit." - }, - { - "name": "alpha_min", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``X`` is overwritten." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``Gram`` is overwritten." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls output verbosity." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``return_path==True`` returns the entire path, else returns only the last point of the path." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. This option is only allowed with method 'lasso'. Note that the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent lasso_path function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Input data. Note that if X is None then the Gram matrix must be\n specified, i.e., cannot be None or False.\n\ny : None or array-like of shape (n_samples,)\n Input targets.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nGram : None, 'auto', array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlars_path_gram\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_" - }, - { - "name": "lars_path_gram", - "decorators": [], - "parameters": [ - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Xy = np.dot(X.T, y)." - }, - { - "name": "Gram", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gram = np.dot(X.T * X)." - }, - { - "name": "n_samples", - "type": "Union[float, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Equivalent size of sample." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform, set to infinity for no limit." - }, - { - "name": "alpha_min", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``X`` is overwritten." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``Gram`` is overwritten." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls output verbosity." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``return_path==True`` returns the entire path, else returns only the last point of the path." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. This option is only allowed with method 'lasso'. Note that the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent lasso_path function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "lars_path in the sufficient stats mode [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nXy : array-like of shape (n_samples,) or (n_samples, n_targets)\n Xy = np.dot(X.T, y).\n\nGram : array-like of shape (n_features, n_features)\n Gram = np.dot(X.T * X).\n\nn_samples : int or float\n Equivalent size of sample.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlars_path\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_" - }, - { - "name": "_lars_path_solver", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Optional[NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. Note that if X is None then Gram must be specified, i.e., cannot be None or False." - }, - { - "name": "y", - "type": "Optional[NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "Xy", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "`Xy = np.dot(X.T, y)` that can be precomputed. It is useful only when the Gram matrix is precomputed." - }, - { - "name": "Gram", - "type": "Optional[Union[Literal['auto'], ArrayLike]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features." - }, - { - "name": "n_samples", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Equivalent size of sample. If `None`, it will be `n_samples`." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform, set to infinity for no limit." - }, - { - "name": "alpha_min", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``X`` is overwritten." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, ``Gram`` is overwritten." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls output verbosity." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``return_path==True`` returns the entire path, else returns only the last point of the path." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the number of iterations." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. This option is only allowed with method 'lasso'. Note that the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso algorithm are typically in congruence with the solution of the coordinate descent lasso_path function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : None or ndarray of shape (n_samples, n_features)\n Input data. Note that if X is None then Gram must be specified,\n i.e., cannot be None or False.\n\ny : None or ndarray of shape (n_samples,)\n Input targets.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n `Xy = np.dot(X.T, y)` that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nGram : None, 'auto' or array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\nn_samples : int or float, default=None\n Equivalent size of sample. If `None`, it will be `n_samples`.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlasso_path\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_" - }, - { - "name": "_check_copy_and_writeable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_lars_path_residues", - "decorators": [], - "parameters": [ - { - "name": "X_train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit the LARS on" - }, - { - "name": "y_train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to fit LARS on" - }, - { - "name": "X_test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to compute the residues on" - }, - { - "name": "y_test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to compute the residues on" - }, - { - "name": "Gram", - "type": "Optional[Union[Literal['auto'], ArrayLike]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether X_train, X_test, y_train and y_test should be copied; if False, they may be overwritten." - }, - { - "name": "method", - "type": "Literal['lar', 'lasso']", - "hasDefault": true, - "default": "'lar'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the amount of verbosity" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. See reservations for using this option in combination with method 'lasso' for expected small values of alpha in the doc of LassoLarsCV and LassoLarsIC." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "500", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations to perform." - }, - { - "name": "eps", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the residues on left-out data for a full LARS path\n\nParameters\n-----------\nX_train : array-like of shape (n_samples, n_features)\n The data to fit the LARS on\n\ny_train : array-like of shape (n_samples,)\n The target variable to fit LARS on\n\nX_test : array-like of shape (n_samples, n_features)\n The data to compute the residues on\n\ny_test : array-like of shape (n_samples,)\n The target variable to compute the residues on\n\nGram : None, 'auto' or array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features\n\ncopy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied;\n if False, they may be overwritten.\n\nmethod : {'lar' , 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\nverbose : bool or int, default=False\n Sets the amount of verbosity\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n See reservations for using this option in combination with method\n 'lasso' for expected small values of alpha in the doc of LassoLarsCV\n and LassoLarsIC.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\nReturns\n--------\nalphas : array-like of shape (n_alphas,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n is smaller.\n\nactive : list\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas)\n Coefficients along the path\n\nresidues : array-like of shape (n_alphas, n_samples)\n Residues of the prediction on the test data" - } - ] - }, - { - "name": "sklearn.linear_model._logistic", - "imports": [ - "import numbers", - "import warnings", - "import numpy as np", - "from scipy import optimize", - "from scipy import sparse", - "from scipy.special import expit", - "from scipy.special import logsumexp", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _base import LinearClassifierMixin", - "from _base import SparseCoefMixin", - "from _base import BaseEstimator", - "from _sag import sag_solver", - "from preprocessing import LabelEncoder", - "from preprocessing import LabelBinarizer", - "from svm._base import _fit_liblinear", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils import compute_class_weight", - "from utils import check_random_state", - "from utils.extmath import log_logistic", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import softmax", - "from utils.extmath import squared_norm", - "from utils.extmath import row_norms", - "from utils.optimize import _newton_cg", - "from utils.optimize import _check_optimize_result", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from utils.fixes import _joblib_parallel_args", - "from utils.fixes import delayed", - "from model_selection import check_cv", - "from metrics import get_scorer" - ], - "classes": [ - { - "name": "LogisticRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet', 'none']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver. If 'none' (not supported by the liblinear solver), no regularization is applied. .. versionadded:: 0.19 l1 penalty with SAGA solver (allowing 'multinomial' + L1)" - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17 *class_weight='balanced'*" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "solver", - "type": "Literal['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and 'saga' are faster for large ones. - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs' handle multinomial loss; 'liblinear' is limited to one-versus-rest schemes. - 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty - 'liblinear' and 'saga' also handle L1 penalty - 'saga' also supports 'elasticnet' penalty - 'liblinear' does not support setting ``penalty='none'`` Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver. .. versionchanged:: 0.22 The default solver changed from 'liblinear' to 'lbfgs' in 0.22." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations taken for the solvers to converge." - }, - { - "name": "multi_class", - "type": "Literal['auto', 'ovr', 'multinomial']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. .. versionchanged:: 0.22 Default changed from 'ovr' to 'auto' in 0.22." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the liblinear and lbfgs solvers set verbose to any positive number for verbosity." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver. See :term:`the Glossary `. .. versionadded:: 0.17 *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPU cores used when parallelizing over classes if multi_class='ovr'\". This parameter is ignored when the ``solver`` is set to 'liblinear' regardless of whether 'multi_class' is specified or not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.17 *sample_weight* support to LogisticRegression." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to LogisticRegression.\n\nReturns\n-------\nself\n Fitted estimator.\n\nNotes\n-----\nThe SAGA solver supports both float64 and float32 bit arrays." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vector to be scored, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nFor a multi_class problem, if multi_class is set to be \"multinomial\"\nthe softmax function is used to find the predicted probability of\neach class.\nElse use a one-vs-rest approach, i.e calculate the probability\nof each class assuming it to be positive using the logistic function.\nand normalize these values across all the classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in ``self.classes_``." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vector to be scored, where `n_samples` is the number of samples and `n_features` is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict logarithm of probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in ``self.classes_``." - } - ], - "docstring": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npenalty : {'l1', 'l2', 'elasticnet', 'none'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver. If 'none' (not supported by the\n liblinear solver), no regularization is applied.\n\n .. versionadded:: 0.19\n l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Inverse of regularization strength; must be a positive float.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\nintercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n *class_weight='balanced'*\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty\n - 'liblinear' and 'saga' also handle L1 penalty\n - 'saga' also supports 'elasticnet' penalty\n - 'liblinear' does not support setting ``penalty='none'``\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can\n preprocess the data with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n .. versionchanged:: 0.22\n The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n\nmax_iter : int, default=100\n Maximum number of iterations taken for the solvers to converge.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n Useless for liblinear solver. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\nn_jobs : int, default=None\n Number of CPU cores used when parallelizing over classes if\n multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n set to 'liblinear' regardless of whether 'multi_class' is specified or\n not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors.\n See :term:`Glossary ` for more details.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `coef_` corresponds\n to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape (1,) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `intercept_`\n corresponds to outcome 1 (True) and `-intercept_` corresponds to\n outcome 0 (False).\n\nn_iter_ : ndarray of shape (n_classes,) or (1, )\n Actual number of iterations for all classes. If binary or multinomial,\n it returns only 1 element. For liblinear solver, only the maximum\n number of iteration across all classes is given.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression (when given\n the parameter ``loss=\"log\"``).\nLogisticRegressionCV : Logistic regression with built-in cross validation.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon,\nto have slightly different results for the same input data. If\nthat happens, try with a smaller tol parameter.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear `\nin the narrative documentation.\n\nReferences\n----------\n\nL-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\nLIBLINEAR -- A Library for Large Linear Classification\n https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\nSAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n Minimizing Finite Sums with the Stochastic Average Gradient\n https://hal.inria.fr/hal-00860051/document\n\nSAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n SAGA: A Fast Incremental Gradient Method With Support\n for Non-Strongly Convex Composite Objectives\n https://arxiv.org/abs/1407.0202\n\nHsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n methods for logistic regression and maximum entropy models.\n Machine Learning 85(1-2):41-75.\n https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :])\narray([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n [9.7...e-01, 2.8...e-02, ...e-08]])\n>>> clf.score(X, y)\n0.97..." - }, - { - "name": "LogisticRegressionCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "Cs", - "type": "Union[List, int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Each of the values in Cs describes the inverse of regularization strength. If Cs is as an int, then a grid of Cs values are chosen in a logarithmic scale between 1e-4 and 1e4. Like in support vector machines, smaller values specify stronger regularization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The default cross-validation generator used is Stratified K-Folds. If an integer is provided, then it is the number of folds used. See the module :mod:`sklearn.model_selection` module for the list of possible cross-validation objects. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. The default scoring option used is 'accuracy'." - }, - { - "name": "solver", - "type": "Literal['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and 'saga' are faster for large ones. - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs' handle multinomial loss; 'liblinear' is limited to one-versus-rest schemes. - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas 'liblinear' and 'saga' handle L1 penalty. - 'liblinear' might be slower in LogisticRegressionCV because it does not handle warm-starting. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the optimization algorithm." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17 class_weight == 'balanced'" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPU cores used during the cross-validation loop. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any positive number for verbosity." - }, - { - "name": "refit", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If set to True, the scores are averaged across all folds, and the coefs and the C that corresponds to the best score is taken, and a final refit is done using these parameters. Otherwise the coefs, intercepts and C that correspond to the best scores across folds are averaged." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'multinomial']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. .. versionchanged:: 0.22 Default changed from 'ovr' to 'auto' in 0.22." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data. Note that this only applies to the solver and not the cross-validation generator. See :term:`Glossary ` for details." - }, - { - "name": "l1_ratios", - "type": "List[float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to using ``penalty='l2'``, while 1 is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nself : object" - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the score using the `scoring` option on the given\ntest data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score of self.predict(X) wrt. y." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary`).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nCs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of regularization\n strength. If Cs is as an int, then a grid of Cs values are chosen\n in a logarithmic scale between 1e-4 and 1e4.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\ncv : int or cross-validation generator, default=None\n The default cross-validation generator used is Stratified K-Folds.\n If an integer is provided, then it is the number of folds used.\n See the module :mod:`sklearn.model_selection` module for the\n list of possible cross-validation objects.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\nscoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is 'accuracy'.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas\n 'liblinear' and 'saga' handle L1 penalty.\n - 'liblinear' might be slower in LogisticRegressionCV because it does\n not handle warm-starting.\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can preprocess the data\n with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nmax_iter : int, default=100\n Maximum number of iterations of the optimization algorithm.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n class_weight == 'balanced'\n\nn_jobs : int, default=None\n Number of CPU cores used during the cross-validation loop.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n positive number for verbosity.\n\nrefit : bool, default=True\n If set to True, the scores are averaged across all folds, and the\n coefs and the C that corresponds to the best score is taken, and a\n final refit is done using these parameters.\n Otherwise the coefs, intercepts and C that correspond to the\n best scores across folds are averaged.\n\nintercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n Note that this only applies to the solver and not the cross-validation\n generator. See :term:`Glossary ` for details.\n\nl1_ratios : list of float, default=None\n The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n using ``penalty='l2'``, while 1 is equivalent to using\n ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n of L1 and L2.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem\n is binary.\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape(1,) when the problem is binary.\n\nCs_ : ndarray of shape (n_cs)\n Array of C i.e. inverse of regularization parameter values used\n for cross-validation.\n\nl1_ratios_ : ndarray of shape (n_l1_ratios)\n Array of l1_ratios used for cross-validation. If no l1_ratio is used\n (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\ncoefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or (n_folds, n_cs, n_features + 1)\n dict with classes as the keys, and the path of coefficients obtained\n during cross-validating across each fold and then across each Cs\n after doing an OvR for the corresponding class as values.\n If the 'multi_class' option is set to 'multinomial', then\n the coefs_paths are the coefficients corresponding to each class.\n Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\nscores_ : dict\n dict with classes as the keys, and the values as the\n grid of scores obtained during cross-validating each fold, after doing\n an OvR for the corresponding class. If the 'multi_class' option\n given is 'multinomial' then the same scores are repeated across\n all classes, since this is the multinomial class. Each dict value\n has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n ``penalty='elasticnet'``.\n\nC_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of C that maps to the best scores across every class. If refit is\n set to False, then for each class, the best C is the average of the\n C's that correspond to the best scores for each fold.\n `C_` is of shape(n_classes,) when the problem is binary.\n\nl1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of l1_ratio that maps to the best scores across every class. If\n refit is set to False, then for each class, the best l1_ratio is the\n average of the l1_ratio's that correspond to the best scores for each\n fold. `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\nn_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n Actual number of iterations for all classes, folds and Cs.\n In the binary or multinomial cases, the first dimension is equal to 1.\n If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegressionCV\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :]).shape\n(2, 3)\n>>> clf.score(X, y)\n0.98...\n\nSee Also\n--------\nLogisticRegression" - } - ], - "functions": [ - { - "name": "_intercept_dot", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes y * np.dot(X, w).\n\nIt takes into consideration if the intercept should be fit or not.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nReturns\n-------\nw : ndarray of shape (n_features,)\n Coefficient vector without the intercept weight (w[-1]) if the\n intercept should be fit. Unchanged otherwise.\n\nc : float\n The intercept.\n\nyz : float\n y * np.dot(X, w)." - }, - { - "name": "_logistic_loss_and_grad", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the logistic loss and gradient.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nout : float\n Logistic loss.\n\ngrad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient." - }, - { - "name": "_logistic_loss", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the logistic loss.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nout : float\n Logistic loss." - }, - { - "name": "_logistic_grad_hess", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of labels." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the gradient and the Hessian, in the case of a logistic loss.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\ngrad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient.\n\nHs : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient." - }, - { - "name": "_multinomial_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes multinomial loss and class probabilities.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\nloss : float\n Multinomial loss.\n\np : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities.\n\nw : ndarray of shape (n_classes, n_features)\n Reshaped param vector excluding intercept terms.\n\nReference\n---------\nBishop, C. M. (2006). Pattern recognition and machine learning.\nSpringer. (Chapter 4.3.4)" - }, - { - "name": "_multinomial_loss_grad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the multinomial loss, gradient and class probabilities.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\nloss : float\n Multinomial loss.\n\ngrad : ndarray of shape (n_classes * n_features,) or (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\np : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities\n\nReference\n---------\nBishop, C. M. (2006). Pattern recognition and machine learning.\nSpringer. (Chapter 4.3.4)" - }, - { - "name": "_multinomial_grad_hess", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "(n_classes * (n_features + 1),) Coefficient vector." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Transformed labels according to the output of LabelBinarizer." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. alpha is equal to 1 / C." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the gradient and the Hessian, in the case of a multinomial loss.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\ngrad : ndarray of shape (n_classes * n_features,) or (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\nhessp : callable\n Function that takes in a vector input of shape (n_classes * n_features)\n or (n_classes * (n_features + 1)) and returns matrix-vector product\n with hessian.\n\nReferences\n----------\nBarak A. Pearlmutter (1993). Fast Exact Multiplication by the Hessian.\n http://www.bcl.hamilton.ie/~barak/papers/nc-hessian.pdf" - }, - { - "name": "_check_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_multi_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_logistic_regression_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, target values." - }, - { - "name": "pos_class", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class with respect to which we perform a one-vs-all fit. If None, then it is assumed that the given problem is binary." - }, - { - "name": "Cs", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "List of values for the regularization parameter or integer specifying the number of regularization parameters that should be used. In this case, the parameters will be chosen in a logarithmic scale between 1e-4 and 1e4." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit an intercept for the model. In this case the shape of the returned array is (n_cs, n_features + 1)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the newton-cg and lbfgs solvers, the iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol`` where ``g_i`` is the i-th component of the gradient." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the liblinear and lbfgs solvers set verbose to any positive number for verbosity." - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Numerical solver to use." - }, - { - "name": "coef", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization value for coefficients of logistic regression. Useless for liblinear solver." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'multinomial', 'auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'. 'auto' selects 'ovr' if the data is binary, or if solver='liblinear', and otherwise selects 'multinomial'. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for 'multinomial' case. .. versionchanged:: 0.22 Default changed from 'ovr' to 'auto' in 0.22." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, the input arrays X and y will not be checked." - }, - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples. Used only in SAG solver. If None, it will be computed, going through all the samples. The value should be precomputed to speed up cross validation." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Input data, target values.\n\npos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\nCs : int or array-like of shape (n_cs,), default=10\n List of values for the regularization parameter or integer specifying\n the number of regularization parameters that should be used. In this\n case, the parameters will be chosen in a logarithmic scale between\n 1e-4 and 1e4.\n\nfit_intercept : bool, default=True\n Whether to fit an intercept for the model. In this case the shape of\n the returned array is (n_cs, n_features + 1).\n\nmax_iter : int, default=100\n Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n Numerical solver to use.\n\ncoef : array-like of shape (n_features,), default=None\n Initialization value for coefficients of logistic regression.\n Useless for liblinear solver.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\nintercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept. For\n ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n n_features) or (n_classes, n_cs, n_features + 1).\n\nCs : ndarray\n Grid of Cs used for cross-validation.\n\nn_iter : array of shape (n_cs,)\n Actual number of iteration for each Cs.\n\nNotes\n-----\nYou might get slightly different results with the solver liblinear than\nwith the others since this uses LIBLINEAR which penalizes the intercept.\n\n.. versionchanged:: 0.19\n The \"copy\" parameter was removed." - }, - { - "name": "_log_reg_scoring_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target labels." - }, - { - "name": "train", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the train set." - }, - { - "name": "test", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The indices of the test set." - }, - { - "name": "pos_class", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class with respect to which we perform a one-vs-all fit. If None, then it is assumed that the given problem is binary." - }, - { - "name": "Cs", - "type": "Union[List, int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Each of the values in Cs describes the inverse of regularization strength. If Cs is as an int, then a grid of Cs values are chosen in a logarithmic scale between 1e-4 and 1e4. If not provided, then a fixed set of values for Cs are used." - }, - { - "name": "scoring", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. The default scoring option used is accuracy_score." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If False, then the bias term is set to zero. Else the last term of each coef_ gives us the intercept." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the liblinear and lbfgs solvers set verbose to any positive number for verbosity." - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Decides which solver to use." - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Used to specify the norm used in the penalization. The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is only supported by the 'saga' solver." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Useful only when the solver 'liblinear' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "multi_class", - "type": "Literal['auto', 'ovr', 'multinomial']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "If the option chosen is 'ovr', then a binary problem is fit for each label. For 'multinomial' the loss minimised is the multinomial loss fit across the entire probability distribution, *even when the data is binary*. 'multinomial' is unavailable when solver='liblinear'." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples. Used only in SAG solver. If None, it will be computed, going through all the samples. The value should be precomputed to speed up cross validation." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes scores across logistic_regression_path\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target labels.\n\ntrain : list of indices\n The indices of the train set.\n\ntest : list of indices\n The indices of the test set.\n\npos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\nCs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of\n regularization strength. If Cs is as an int, then a grid of Cs\n values are chosen in a logarithmic scale between 1e-4 and 1e4.\n If not provided, then a fixed set of values for Cs are used.\n\nscoring : callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is accuracy_score.\n\nfit_intercept : bool, default=False\n If False, then the bias term is set to zero. Else the last\n term of each coef_ gives us the intercept.\n\nmax_iter : int, default=100\n Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n Decides which solver to use.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\nintercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept.\n\nCs : ndarray\n Grid of Cs used for cross-validation.\n\nscores : ndarray of shape (n_cs,)\n Scores obtained for each Cs.\n\nn_iter : ndarray of shape(n_cs,)\n Actual number of iteration for each Cs." - } - ] - }, - { - "name": "sklearn.linear_model._omp", - "imports": [ - "import warnings", - "from math import sqrt", - "import numpy as np", - "from scipy import linalg", - "from scipy.linalg.lapack import get_lapack_funcs", - "from joblib import Parallel", - "from _base import LinearModel", - "from _base import _pre_fit", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from utils import as_float_array", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from model_selection import check_cv" - ], - "classes": [ - { - "name": "OrthogonalMatchingPursuit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "precompute", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a precomputed Gram and Xy matrix to speed up calculations. Improves performance when :term:`n_targets` or :term:`n_samples` is very large. Note that if you already have such matrices, you can pass them directly to the fit method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto' or bool, default='auto'\n Whether to use a precomputed Gram and Xy matrix to speed up\n calculations. Improves performance when :term:`n_targets` or\n :term:`n_samples` is very large. Note that if you already have such\n matrices, you can pass them directly to the fit method.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : int or array-like\n Number of active features across every target.\n\nn_nonzero_coefs_ : int\n The number of non-zero coefficients in the solution. If\n `n_nonzero_coefs` is None and `tol` is None this value is either set\n to 10% of `n_features` or 1, whichever is greater.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuit\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuit().fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.predict(X[:1,])\narray([-78.3854...])\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\nSee Also\n--------\northogonal_mp\northogonal_mp_gram\nlars_path\nLars\nLassoLars\nsklearn.decomposition.sparse_encode\nOrthogonalMatchingPursuitCV" - }, - { - "name": "OrthogonalMatchingPursuitCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum numbers of iterations to perform, therefore maximum features to include. 10% of ``n_features`` but at least 5 if available." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Sets the verbosity amount." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n returns an instance of self." - } - ], - "docstring": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncopy : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=None\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 10% of ``n_features`` but at least 5 if available.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nAttributes\n----------\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the problem formulation).\n\nn_nonzero_coefs_ : int\n Estimated number of non-zero coefficients giving the best mean squared\n error over the cross-validation folds.\n\nn_iter_ : int or array-like\n Number of active features across every target for the model refit with\n the best hyperparameters got by cross-validating across all folds.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=100, n_informative=10,\n... noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.n_nonzero_coefs_\n10\n>>> reg.predict(X[:1,])\narray([-78.3854...])\n\nSee Also\n--------\northogonal_mp\northogonal_mp_gram\nlars_path\nLars\nLassoLars\nOrthogonalMatchingPursuit\nLarsCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode" - } - ], - "functions": [ - { - "name": "_cholesky_omp", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input dictionary. Columns are assumed to have unit norm." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted number of non-zero elements." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted squared error, if not None overrides n_nonzero_coefs." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthogonal Matching Pursuit step using the Cholesky decomposition.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Input dictionary. Columns are assumed to have unit norm.\n\ny : ndarray of shape (n_samples,)\n Input targets.\n\nn_nonzero_coefs : int\n Targeted number of non-zero elements.\n\ntol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\ncopy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nReturns\n-------\ngamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\nidx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\ncoef : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\nn_active : int\n Number of active features at convergence." - }, - { - "name": "_gram_omp", - "decorators": [], - "parameters": [ - { - "name": "Gram", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gram matrix of the input data matrix." - }, - { - "name": "Xy", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted number of non-zero elements." - }, - { - "name": "tol_0", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared norm of y, required if tol is not None." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targeted squared error, if not None overrides n_nonzero_coefs." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "copy_Xy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthogonal Matching Pursuit step on a precomputed Gram matrix.\n\nThis function uses the Cholesky decomposition method.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data matrix.\n\nXy : ndarray of shape (n_features,)\n Input targets.\n\nn_nonzero_coefs : int\n Targeted number of non-zero elements.\n\ntol_0 : float, default=None\n Squared norm of y, required if tol is not None.\n\ntol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\ncopy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\ncopy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nReturns\n-------\ngamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\nidx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\ncoefs : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\nn_active : int\n Number of active features at convergence." - }, - { - "name": "orthogonal_mp", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. Columns are assumed to have unit norm." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." - }, - { - "name": "precompute", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to perform precomputations. Improves performance when n_targets or n_samples is very large." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems.\nAn instance of the problem has the form:\n\nWhen parametrized by the number of non-zero coefficients using\n`n_nonzero_coefs`:\nargmin ||y - X\\gamma||^2 subject to ||\\gamma||_0 <= n_{nonzero coefs}\n\nWhen parametrized by error using the parameter `tol`:\nargmin ||\\gamma||_0 subject to ||y - X\\gamma||^2 <= tol\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Input data. Columns are assumed to have unit norm.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Input targets.\n\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nprecompute : 'auto' or bool, default=False\n Whether to perform precomputations. Improves performance when n_targets\n or n_samples is very large.\n\ncopy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\nn_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp_gram\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf" - }, - { - "name": "orthogonal_mp_gram", - "decorators": [], - "parameters": [ - { - "name": "Gram", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gram matrix of the input data: X.T * X." - }, - { - "name": "Xy", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input targets multiplied by X: X.T * y." - }, - { - "name": "n_nonzero_coefs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." - }, - { - "name": "norms_squared", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Squared L2 norms of the lines of y. Required if tol is not None." - }, - { - "name": "copy_Gram", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway." - }, - { - "name": "copy_Xy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten." - }, - { - "name": "return_path", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data: X.T * X.\n\nXy : ndarray of shape (n_features,) or (n_features, n_targets)\n Input targets multiplied by X: X.T * y.\n\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nnorms_squared : array-like of shape (n_targets,), default=None\n Squared L2 norms of the lines of y. Required if tol is not None.\n\ncopy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\ncopy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\nn_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf" - }, - { - "name": "_omp_path_residues", - "decorators": [], - "parameters": [ - { - "name": "X_train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit the LARS on." - }, - { - "name": "y_train", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to fit LARS on." - }, - { - "name": "X_test", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to compute the residues on." - }, - { - "name": "y_test", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to compute the residues on." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether X_train, X_test, y_train and y_test should be copied. If False, they may be overwritten." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum numbers of iterations to perform, therefore maximum features to include. 100 by default." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the residues on left-out data for a full LARS path.\n\nParameters\n----------\nX_train : ndarray of shape (n_samples, n_features)\n The data to fit the LARS on.\n\ny_train : ndarray of shape (n_samples)\n The target variable to fit LARS on.\n\nX_test : ndarray of shape (n_samples, n_features)\n The data to compute the residues on.\n\ny_test : ndarray of shape (n_samples)\n The target variable to compute the residues on.\n\ncopy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied. If\n False, they may be overwritten.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=100\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 100 by default.\n\nReturns\n-------\nresidues : ndarray of shape (n_samples, max_features)\n Residues of the prediction on the test data." - } - ] - }, - { - "name": "sklearn.linear_model._passive_aggressive", - "imports": [ - "from utils.validation import _deprecate_positional_args", - "from _stochastic_gradient import BaseSGDClassifier", - "from _stochastic_gradient import BaseSGDRegressor", - "from _stochastic_gradient import DEFAULT_EPSILON" - ], - "classes": [ - { - "name": "PassiveAggressiveClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Maximum step size (regularization). Defaults to 1.0." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). .. versionadded:: 0.19" - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation. score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level" - }, - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "\"hinge\"", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used: hinge: equivalent to PA-I in the reference paper. squared_hinge: equivalent to PA-II in the reference paper." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to shuffle the training data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled." - }, - { - "name": "class_weight", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` .. versionadded:: 0.17 parameter *class_weight* to automatically weight samples." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So average=10 will begin averaging after seeing 10 samples. .. versionadded:: 0.19 parameter *average* to use weights averaging in SGD" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the target values" - }, - { - "name": "classes", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained by via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of the training data\n\ny : numpy array of shape [n_samples]\n Subset of the target values\n\nclasses : array, shape = [n_classes]\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "coef_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : numpy array of shape [n_samples]\n Target values\n\ncoef_init : array, shape = [n_classes,n_features]\n The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [n_classes]\n The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Passive Aggressive Classifier\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nC : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : integer, default=0\n The verbosity level\n\nloss : string, default=\"hinge\"\n The loss function to be used:\n hinge: equivalent to PA-I in the reference paper.\n squared_hinge: equivalent to PA-II in the reference paper.\n\nn_jobs : int or None, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\nclass_weight : dict, {class_label: weight} or \"balanced\" or None, default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n .. versionadded:: 0.17\n parameter *class_weight* to automatically weight samples.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes, n_features]\n Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nclasses_ : array of shape (n_classes,)\n The unique classes labels.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nloss_function_ : callable\n Loss function used by the algorithm.\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveClassifier\n>>> from sklearn.datasets import make_classification\n\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n... tol=1e-3)\n>>> clf.fit(X, y)\nPassiveAggressiveClassifier(random_state=0)\n>>> print(clf.coef_)\n[[0.26642044 0.45070924 0.67251877 0.64185414]]\n>>> print(clf.intercept_)\n[1.84127814]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]\n\nSee Also\n--------\nSGDClassifier\nPerceptron\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)" - }, - { - "name": "PassiveAggressiveRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Maximum step size (regularization). Defaults to 1.0." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "Optional[float]", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). .. versionadded:: 0.19" - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation. score is not improving. If set to True, it will automatically set aside a fraction of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level" - }, - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "\"epsilon_insensitive\"", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used: epsilon_insensitive: equivalent to PA-I in the reference paper. squared_epsilon_insensitive: equivalent to PA-II in the reference paper." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "DEFAULT_EPSILON", - "limitation": null, - "ignored": false, - "docstring": "If the difference between the current prediction and the correct label is below this threshold, the model is not updated." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to shuffle the training data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So average=10 will begin averaging after seeing 10 samples. .. versionadded:: 0.19 parameter *average* to use weights averaging in SGD" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of target values" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of training data\n\ny : numpy array of shape [n_samples]\n Subset of target values\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "coef_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : numpy array of shape [n_samples]\n Target values\n\ncoef_init : array, shape = [n_features]\n The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [1]\n The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Passive Aggressive Regressor\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nC : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered. Defaults to True.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : integer, default=0\n The verbosity level\n\nloss : string, default=\"epsilon_insensitive\"\n The loss function to be used:\n epsilon_insensitive: equivalent to PA-I in the reference paper.\n squared_epsilon_insensitive: equivalent to PA-II in the reference\n paper.\n\nepsilon : float, default=DEFAULT_EPSILON\n If the difference between the current prediction and the correct label\n is below this threshold, the model is not updated.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes, n_features]\n Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveRegressor\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n... tol=1e-3)\n>>> regr.fit(X, y)\nPassiveAggressiveRegressor(max_iter=100, random_state=0)\n>>> print(regr.coef_)\n[20.48736655 34.18818427 67.59122734 87.94731329]\n>>> print(regr.intercept_)\n[-0.02306214]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-0.02306214]\n\nSee Also\n--------\nSGDRegressor\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)" - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._perceptron", - "imports": [ - "from utils.validation import _deprecate_positional_args", - "from _stochastic_gradient import BaseSGDClassifier" - ], - "classes": [ - { - "name": "Perceptron", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "penalty", - "type": "Literal['l2', 'l1', 'elasticnet']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The penalty (aka regularization term) to be used." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term if regularization is used." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`. `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1. Only used if `penalty='elasticnet'`. .. versionadded:: 0.24" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level" - }, - { - "name": "eta0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant by which the updates are multiplied." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to shuffle the training data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation. score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. .. versionadded:: 0.20" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20" - }, - { - "name": "class_weight", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Perceptron\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\npenalty : {'l2','l1','elasticnet'}, default=None\n The penalty (aka regularization term) to be used.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term if regularization is\n used.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n Only used if `penalty='elasticnet'`.\n\n .. versionadded:: 0.24\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level\n\neta0 : double, default=1\n Constant by which the updates are multiplied.\n\nn_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution. See\n :term:`the Glossary `.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nloss_function_ : concrete\u00a0LossFunction\n The function that determines the loss, or difference between the\n output of the algorithm and the target values.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nNotes\n-----\n\n``Perceptron`` is a classification algorithm which shares the same\nunderlying implementation with ``SGDClassifier``. In fact,\n``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\neta0=1, learning_rate=\"constant\", penalty=None)`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import Perceptron\n>>> X, y = load_digits(return_X_y=True)\n>>> clf = Perceptron(tol=1e-3, random_state=0)\n>>> clf.fit(X, y)\nPerceptron()\n>>> clf.score(X, y)\n0.939...\n\nSee Also\n--------\nSGDClassifier\n\nReferences\n----------\n\nhttps://en.wikipedia.org/wiki/Perceptron and references therein." - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._ransac", - "imports": [ - "import numpy as np", - "import warnings", - "from base import BaseEstimator", - "from base import MetaEstimatorMixin", - "from base import RegressorMixin", - "from base import clone", - "from base import MultiOutputMixin", - "from utils import check_random_state", - "from utils import check_consistent_length", - "from utils.random import sample_without_replacement", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from _base import LinearRegression", - "from utils.validation import has_fit_parameter", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "RANSACRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Base estimator object which implements the following methods: * `fit(X, y)`: Fit model to given training data and target values. * `score(X, y)`: Returns the mean accuracy on the given test data, which is used for the stop criterion defined by `stop_score`. Additionally, the score is used to decide which of two equally large consensus sets is chosen as the better one. * `predict(X)`: Returns predicted values using the linear model, which is used to compute residual error using loss function. If `base_estimator` is None, then :class:`~sklearn.linear_model.LinearRegression` is used for target values of dtype float. Note that the current implementation only supports regression estimators." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples chosen randomly from original data. Treated as an absolute number of samples for `min_samples >= 1`, treated as a relative number `ceil(min_samples * X.shape[0]`) for `min_samples < 1`. This is typically chosen as the minimal number of samples necessary to estimate the given `base_estimator`. By default a ``sklearn.linear_model.LinearRegression()`` estimator is assumed and `min_samples` is chosen as ``X.shape[1] + 1``." - }, - { - "name": "residual_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum residual for a data sample to be classified as an inlier. By default the threshold is chosen as the MAD (median absolute deviation) of the target values `y`." - }, - { - "name": "is_data_valid", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This function is called with the randomly selected data before the model is fitted to it: `is_data_valid(X, y)`. If its return value is False the current randomly chosen sub-sample is skipped." - }, - { - "name": "is_model_valid", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This function is called with the estimated model and the randomly selected data: `is_model_valid(model, X, y)`. If its return value is False the current randomly chosen sub-sample is skipped. Rejecting samples with this function is computationally costlier than with `is_data_valid`. `is_model_valid` should therefore only be used if the estimated model is needed for making the rejection decision." - }, - { - "name": "max_trials", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for random sample selection." - }, - { - "name": "max_skips", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations that can be skipped due to finding zero inliers or invalid data defined by ``is_data_valid`` or invalid models defined by ``is_model_valid``. .. versionadded:: 0.19" - }, - { - "name": "stop_n_inliers", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Stop iteration if at least this number of inliers are found." - }, - { - "name": "stop_score", - "type": "float", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Stop iteration if score is greater equal than this threshold." - }, - { - "name": "stop_probability", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "RANSAC iteration stops if at least one outlier-free set of the training data is sampled in RANSAC. This requires to generate at least N samples (iterations):: N >= log(1 - probability) / log(1 - e**m) where the probability (confidence) is typically set to high value such as 0.99 (the default) and e is the current fraction of inliers w.r.t. the total number of samples." - }, - { - "name": "loss", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'absolute_loss'", - "limitation": null, - "ignored": false, - "docstring": "String inputs, \"absolute_loss\" and \"squared_loss\" are supported which find the absolute loss and squared loss per sample respectively. If ``loss`` is a callable, then it should be a function that takes two arrays as inputs, the true and predicted value and returns a 1-D array with the i-th value of the array corresponding to the loss on ``X[i]``. If the loss on a sample is greater than the ``residual_threshold``, then this sample is classified as an outlier. .. versionadded:: 0.18" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The generator used to initialize the centers. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample raises error if sample_weight is passed and base_estimator fit method does not support it. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit estimator using RANSAC algorithm.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_features]\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n raises error if sample_weight is passed and base_estimator\n fit method does not support it.\n\n .. versionadded:: 0.18\n\nRaises\n------\nValueError\n If no valid consensus set could be found. This occurs if\n `is_data_valid` and `is_model_valid` return False for all\n `max_trials` randomly chosen sub-samples." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the estimated model.\n\nThis is a wrapper for `estimator_.predict(X)`.\n\nParameters\n----------\nX : numpy array of shape [n_samples, n_features]\n\nReturns\n-------\ny : array, shape = [n_samples] or [n_samples, n_targets]\n Returns predicted values." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the score of the prediction.\n\nThis is a wrapper for `estimator_.score(X, y)`.\n\nParameters\n----------\nX : numpy array or sparse matrix of shape [n_samples, n_features]\n Training data.\n\ny : array, shape = [n_samples] or [n_samples, n_targets]\n Target values.\n\nReturns\n-------\nz : float\n Score of the prediction." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : object, default=None\n Base estimator object which implements the following methods:\n\n * `fit(X, y)`: Fit model to given training data and target values.\n * `score(X, y)`: Returns the mean accuracy on the given test data,\n which is used for the stop criterion defined by `stop_score`.\n Additionally, the score is used to decide which of two equally\n large consensus sets is chosen as the better one.\n * `predict(X)`: Returns predicted values using the linear model,\n which is used to compute residual error using loss function.\n\n If `base_estimator` is None, then\n :class:`~sklearn.linear_model.LinearRegression` is used for\n target values of dtype float.\n\n Note that the current implementation only supports regression\n estimators.\n\nmin_samples : int (>= 1) or float ([0, 1]), default=None\n Minimum number of samples chosen randomly from original data. Treated\n as an absolute number of samples for `min_samples >= 1`, treated as a\n relative number `ceil(min_samples * X.shape[0]`) for\n `min_samples < 1`. This is typically chosen as the minimal number of\n samples necessary to estimate the given `base_estimator`. By default a\n ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n `min_samples` is chosen as ``X.shape[1] + 1``.\n\nresidual_threshold : float, default=None\n Maximum residual for a data sample to be classified as an inlier.\n By default the threshold is chosen as the MAD (median absolute\n deviation) of the target values `y`.\n\nis_data_valid : callable, default=None\n This function is called with the randomly selected data before the\n model is fitted to it: `is_data_valid(X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n\nis_model_valid : callable, default=None\n This function is called with the estimated model and the randomly\n selected data: `is_model_valid(model, X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n Rejecting samples with this function is computationally costlier than\n with `is_data_valid`. `is_model_valid` should therefore only be used if\n the estimated model is needed for making the rejection decision.\n\nmax_trials : int, default=100\n Maximum number of iterations for random sample selection.\n\nmax_skips : int, default=np.inf\n Maximum number of iterations that can be skipped due to finding zero\n inliers or invalid data defined by ``is_data_valid`` or invalid models\n defined by ``is_model_valid``.\n\n .. versionadded:: 0.19\n\nstop_n_inliers : int, default=np.inf\n Stop iteration if at least this number of inliers are found.\n\nstop_score : float, default=np.inf\n Stop iteration if score is greater equal than this threshold.\n\nstop_probability : float in range [0, 1], default=0.99\n RANSAC iteration stops if at least one outlier-free set of the training\n data is sampled in RANSAC. This requires to generate at least N\n samples (iterations)::\n\n N >= log(1 - probability) / log(1 - e**m)\n\n where the probability (confidence) is typically set to high value such\n as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n the total number of samples.\n\nloss : string, callable, default='absolute_loss'\n String inputs, \"absolute_loss\" and \"squared_loss\" are supported which\n find the absolute loss and squared loss per sample\n respectively.\n\n If ``loss`` is a callable, then it should be a function that takes\n two arrays as inputs, the true and predicted value and returns a 1-D\n array with the i-th value of the array corresponding to the loss\n on ``X[i]``.\n\n If the loss on a sample is greater than the ``residual_threshold``,\n then this sample is classified as an outlier.\n\n .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the centers.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimator_ : object\n Best fitted model (copy of the `base_estimator` object).\n\nn_trials_ : int\n Number of random selection trials until one of the stop criteria is\n met. It is always ``<= max_trials``.\n\ninlier_mask_ : bool array of shape [n_samples]\n Boolean mask of inliers classified as ``True``.\n\nn_skips_no_inliers_ : int\n Number of iterations skipped due to finding zero inliers.\n\n .. versionadded:: 0.19\n\nn_skips_invalid_data_ : int\n Number of iterations skipped due to invalid data defined by\n ``is_data_valid``.\n\n .. versionadded:: 0.19\n\nn_skips_invalid_model_ : int\n Number of iterations skipped due to an invalid model defined by\n ``is_model_valid``.\n\n .. versionadded:: 0.19\n\nExamples\n--------\n>>> from sklearn.linear_model import RANSACRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n... n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = RANSACRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9885...\n>>> reg.predict(X[:1,])\narray([-31.9417...])\n\nReferences\n----------\n.. [1] https://en.wikipedia.org/wiki/RANSAC\n.. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf\n.. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf" - } - ], - "functions": [ - { - "name": "_dynamic_max_trials", - "decorators": [], - "parameters": [ - { - "name": "n_inliers", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of inliers in the data." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Total number of samples in the data." - }, - { - "name": "min_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Minimum number of samples chosen randomly from original data." - }, - { - "name": "probability", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Probability (confidence) that one outlier-free sample is generated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine number trials such that at least one outlier-free subset is\nsampled for the given inlier/outlier ratio.\n\nParameters\n----------\nn_inliers : int\n Number of inliers in the data.\n\nn_samples : int\n Total number of samples in the data.\n\nmin_samples : int\n Minimum number of samples chosen randomly from original data.\n\nprobability : float\n Probability (confidence) that one outlier-free sample is generated.\n\nReturns\n-------\ntrials : int\n Number of trials." - } - ] - }, - { - "name": "sklearn.linear_model._ridge", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy import sparse", - "from scipy.sparse import linalg as sp_linalg", - "from _base import LinearClassifierMixin", - "from _base import LinearModel", - "from _base import _rescale_data", - "from _sag import sag_solver", - "from base import RegressorMixin", - "from base import MultiOutputMixin", - "from base import is_classifier", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import row_norms", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils import compute_sample_weight", - "from utils import column_or_1d", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from preprocessing import LabelBinarizer", - "from model_selection import GridSearchCV", - "from metrics import check_scoring", - "from exceptions import ConvergenceWarning", - "from utils.sparsefuncs import mean_variance_axis" - ], - "classes": [ - { - "name": "_BaseRidge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Ridge", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to fit the intercept for this model. If set to false, no intercept will be used in calculations (i.e. ``X`` and ``y`` are expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for conjugate gradient solver. For 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' solver, the default value is 1000." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Precision of the solution." - }, - { - "name": "solver", - "type": "Literal['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than 'cholesky'. - 'cholesky' uses the standard scipy.linalg.solve function to obtain a closed-form solution. - 'sparse_cg' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than 'cholesky' for large-scale data (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses its improved, unbiased version named SAGA. Both methods also use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. All last five solvers support both dense and sparse data. However, only 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is True. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details. .. versionadded:: 0.17 `random_state` to support Stochastic Average Gradient." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge regression model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : {float, ndarray of shape (n_targets,)}, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\nfit_intercept : bool, default=True\n Whether to fit the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. ``X`` and ``y`` are expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.17\n `random_state` to support Stochastic Average Gradient.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\n .. versionadded:: 0.17\n\nSee Also\n--------\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n:class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n combines ridge regression with the kernel trick.\n\nExamples\n--------\n>>> from sklearn.linear_model import Ridge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = Ridge(alpha=1.0)\n>>> clf.fit(X, y)\nRidge()" - }, - { - "name": "RidgeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for conjugate gradient solver. The default value is determined by scipy.sparse.linalg." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Precision of the solution." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``." - }, - { - "name": "solver", - "type": "Literal['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than 'cholesky'. - 'cholesky' uses the standard scipy.linalg.solve function to obtain a closed-form solution. - 'sparse_cg' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than 'cholesky' for large-scale data (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses its unbiased and more flexible version named SAGA. Both methods use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight. .. versionadded:: 0.17 *sample_weight* support to Classifier." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge classifier model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to Classifier.\n\nReturns\n-------\nself : object\n Instance of the estimator." - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set to false, no\n intercept will be used in calculations (e.g. data is expected to be\n already centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n The default value is determined by scipy.sparse.linalg.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its unbiased and more flexible version named SAGA. Both methods\n use an iterative procedure, and are often faster than other solvers\n when both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifierCV : Ridge classifier with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifier\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifier().fit(X, y)\n>>> clf.score(X, y)\n0.9595..." - }, - { - "name": "_X_CenterStackOp", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matvec", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matmat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transpose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Behaves as centered and scaled X with an added intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]])" - }, - { - "name": "_XT_CenterStackOp", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matvec", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_matmat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Behaves as transposed centered and scaled X with an intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]]).T" - }, - { - "name": "_IdentityRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Fake regressor which will directly output the prediction." - }, - { - "name": "_IdentityClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Fake classifier which will directly output the prediction.\n\nWe inherit from LinearClassifierMixin to get the proper shape for the\noutput `y`." - }, - { - "name": "_RidgeGCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_decomp_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_diag_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_gram", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The preprocessed design matrix." - }, - { - "name": "sqrt_sw", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "square roots of sample weights" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the Gram matrix XX^T with possible centering.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The preprocessed design matrix.\n\nsqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\nReturns\n-------\ngram : ndarray of shape (n_samples, n_samples)\n The Gram matrix.\nX_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\nNotes\n-----\nWhen X is dense the centering has been done in preprocessing\nso the mean is 0 and we just compute XX^T.\n\nWhen X is sparse it has not been centered in preprocessing, but it has\nbeen scaled by sqrt(sample weights).\n\nWhen self.fit_intercept is False no centering is done.\n\nThe centered X is never actually computed because centering would break\nthe sparsity of X." - }, - { - "name": "_compute_covariance", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The preprocessed design matrix." - }, - { - "name": "sqrt_sw", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "square roots of sample weights" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes covariance matrix X^TX with possible centering.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n The preprocessed design matrix.\n\nsqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The covariance matrix.\nX_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\nNotes\n-----\nSince X is sparse it has not been centered in preprocessing, but it has\nbeen scaled by sqrt(sample weights).\n\nWhen self.fit_intercept is False no centering is done.\n\nThe centered X is never actually computed because centering would break\nthe sparsity of X." - }, - { - "name": "_sparse_multidot_diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "A", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X_mean", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sqrt_sw", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "square roots of sample weights" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\nwithout explicitely centering X nor computing X.dot(A)\nwhen X is sparse.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n\nA : ndarray of shape (n_features, n_features)\n\nX_mean : ndarray of shape (n_features,)\n\nsqrt_sw : ndarray of shape (n_features,)\n square roots of sample weights\n\nReturns\n-------\ndiag : np.ndarray, shape (n_samples,)\n The computed diagonal." - }, - { - "name": "_eigen_decompose_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Eigendecomposition of X.X^T, used when n_samples <= n_features." - }, - { - "name": "_solve_eigen_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X.X^T (n_samples <= n_features)." - }, - { - "name": "_eigen_decompose_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Eigendecomposition of X^T.X, used when n_samples > n_features\nand X is sparse." - }, - { - "name": "_solve_eigen_covariance_no_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse), and not fitting an intercept." - }, - { - "name": "_solve_eigen_covariance_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse),\nand we are fitting an intercept." - }, - { - "name": "_solve_eigen_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse)." - }, - { - "name": "_svd_decompose_design_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_svd_design_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have an SVD decomposition of X\n(n_samples > n_features and X is dense)." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. Will be cast to float64 if necessary." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to float64 if necessary." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge regression model with gcv.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data. Will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to float64 if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.\n\nNotes\n-----\n\nWe want to solve (K + alpha*Id)c = y,\nwhere K = X X^T is the kernel matrix.\n\nLet G = (K + alpha*Id).\n\nDual solution: c = G^-1y\nPrimal solution: w = X^T c\n\nCompute eigendecomposition K = Q V Q^T.\nThen G^-1 = Q (V + alpha*Id)^-1 Q^T,\nwhere (V + alpha*Id) is diagonal.\nIt is thus inexpensive to inverse for many alphas.\n\nLet loov be the vector of prediction values for each example\nwhen the model was fitted with all examples but this example.\n\nloov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\nLet looe be the vector of prediction errors for each example\nwhen the model was fitted with all examples but this example.\n\nlooe = y - loov = c / diag(G^-1)\n\nThe best score (negative mean squared error or user-provided scoring) is\nstored in the `best_score_` attribute, and the selected hyperparameter in\n`alpha_`.\n\nReferences\n----------\nhttp://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\nhttps://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf" - }, - { - "name": "_BaseRidgeCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data. If using GCV, will be cast to float64 if necessary." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge regression model with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data. If using GCV, will be cast to float64\n if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nWhen sample_weight is provided, the selected hyperparameter may depend\non whether we use leave-one-out cross-validation (cv=None or cv='auto')\nor another form of cross-validation, because only leave-one-out\ncross-validation takes the sample weights into account when computing\nthe validation score." - } - ], - "docstring": null - }, - { - "name": "RidgeCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alphas", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of alpha values to try. Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If using Leave-One-Out cross-validation, alphas must be positive." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. If None, the negative mean squared error if cv is 'auto' or None (i.e. when using leave-one-out cross-validation), and r2 score otherwise." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the efficient Leave-One-Out cross-validation - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used, else, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here." - }, - { - "name": "gcv_mode", - "type": "Literal['auto', 'svd']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating which strategy to use when performing Leave-One-Out Cross-Validation. Options are:: 'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen' 'svd' : force use of singular value decomposition of X when X is dense, eigenvalue decomposition of X^T.X when X is sparse. 'eigen' : force computation via eigendecomposition of X.X^T The 'auto' mode is the default and is intended to pick the cheaper option of the two depending on the shape of the training data." - }, - { - "name": "store_cv_values", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating if the cross-validation values corresponding to each alpha should be stored in the ``cv_values_`` attribute (see below). This flag is only compatible with ``cv=None`` (i.e. using Leave-One-Out Cross-Validation)." - }, - { - "name": "alpha_per_target", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating whether to optimize the alpha value (picked from the `alphas` parameter list) for each target separately (for multi-output settings: multiple prediction targets). When set to `True`, after fitting, the `alpha_` attribute will contain a value for each target. When set to `False`, a single alpha is used for all targets. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n If using Leave-One-Out cross-validation, alphas must be positive.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nscoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n If None, the negative mean squared error if cv is 'auto' or None\n (i.e. when using leave-one-out cross-validation), and r2 score\n otherwise.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\ngcv_mode : {'auto', 'svd', eigen'}, default='auto'\n Flag indicating which strategy to use when performing\n Leave-One-Out Cross-Validation. Options are::\n\n 'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n 'svd' : force use of singular value decomposition of X when X is\n dense, eigenvalue decomposition of X^T.X when X is sparse.\n 'eigen' : force computation via eigendecomposition of X.X^T\n\n The 'auto' mode is the default and is intended to pick the cheaper\n option of the two depending on the shape of the training data.\n\nstore_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\nalpha_per_target : bool, default=False\n Flag indicating whether to optimize the alpha value (picked from the\n `alphas` parameter list) for each target separately (for multi-output\n settings: multiple prediction targets). When set to `True`, after\n fitting, the `alpha_` attribute will contain a value for each target.\n When set to `False`, a single alpha is used for all targets.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_alphas) or shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (only available if\n ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n called, this attribute will contain the mean squared errors\n (by default) or the values of the ``{loss,score}_func`` function\n (if provided in the constructor).\n\ncoef_ : ndarray of shape (n_features) or (n_targets, n_features)\n Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float or ndarray of shape (n_targets,)\n Estimated regularization parameter, or, if ``alpha_per_target=True``,\n the estimated regularization parameter for each target.\n\nbest_score_ : float or ndarray of shape (n_targets,)\n Score of base estimator with best alpha, or, if\n ``alpha_per_target=True``, a score for each target.\n\n .. versionadded:: 0.23\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> X, y = load_diabetes(return_X_y=True)\n>>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.5166...\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeClassifierCV : Ridge classifier with built-in cross validation." - }, - { - "name": "RidgeClassifierCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alphas", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of alpha values to try. Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered)." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the efficient Leave-One-Out cross-validation - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "store_cv_values", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Flag indicating if the cross-validation values corresponding to each alpha should be stored in the ``cv_values_`` attribute (see below). This flag is only compatible with ``cv=None`` (i.e. using Leave-One-Out Cross-Validation)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. When using GCV, will be cast to float64 if necessary." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. Will be cast to X's dtype if necessary." - }, - { - "name": "sample_weight", - "type": "Union[NDArray, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit Ridge classifier with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features. When using GCV,\n will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object" - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nscoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nstore_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (if ``store_cv_values=True`` and\n ``cv=None``). After ``fit()`` has been called, this attribute will\n contain the mean squared errors (by default) or the values of the\n ``{loss,score}_func`` function (if provided in the constructor). This\n attribute exists only when ``store_cv_values`` is True.\n\ncoef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float\n Estimated regularization parameter.\n\nbest_score_ : float\n Score of base estimator with best alpha.\n\n .. versionadded:: 0.23\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.9630...\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge." - } - ], - "functions": [ - { - "name": "_solve_sparse_cg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_lsqr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_cholesky", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_cholesky_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_solve_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_valid_accept_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "ridge_regression", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "alpha", - "type": "Union[ArrayLike, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number." - }, - { - "name": "sample_weight", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. If given a float, every sample will have the same weight. If sample_weight is not None and solver='auto', the solver will be set to 'cholesky'. .. versionadded:: 0.17" - }, - { - "name": "solver", - "type": "Literal['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than 'cholesky'. - 'cholesky' uses the standard scipy.linalg.solve function to obtain a closed-form solution via a Cholesky decomposition of dot(X.T, X) - 'sparse_cg' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than 'cholesky' for large-scale data (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses its improved, unbiased version named SAGA. Both methods also use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that 'sag' and 'saga' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. All last five solvers support both dense and sparse data. However, only 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is True. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for conjugate gradient solver. For the 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' and saga solver, the default value is 1000." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Precision of the solution." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level. Setting verbose > 0 will display additional information depending on the solver used." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the method also returns `n_iter`, the actual number of iteration performed by the solver. .. versionadded:: 0.17" - }, - { - "name": "return_intercept", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True and if X is sparse, the method also returns the intercept, and the solver is automatically changed to 'sag'. This is only a temporary fix for fitting the intercept with sparse data. For dense data, use sklearn.linear_model._preprocess_data before your regression. .. versionadded:: 0.17" - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, the input arrays X and y will not be checked. .. versionadded:: 0.21" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {ndarray, sparse matrix, LinearOperator} of shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nalpha : float or array-like of shape (n_targets,)\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight. If sample_weight is not None and\n solver='auto', the solver will be set to 'cholesky'.\n\n .. versionadded:: 0.17\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution via a Cholesky decomposition of\n dot(X.T, X)\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n 1000.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nverbose : int, default=0\n Verbosity level. Setting verbose > 0 will display additional\n information depending on the solver used.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\nreturn_n_iter : bool, default=False\n If True, the method also returns `n_iter`, the actual number of\n iteration performed by the solver.\n\n .. versionadded:: 0.17\n\nreturn_intercept : bool, default=False\n If True and if X is sparse, the method also returns the intercept,\n and the solver is automatically changed to 'sag'. This is only a\n temporary fix for fitting the intercept with sparse data. For dense\n data, use sklearn.linear_model._preprocess_data before your regression.\n\n .. versionadded:: 0.17\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\n .. versionadded:: 0.21\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\nn_iter : int, optional\n The actual number of iteration performed by the solver.\n Only returned if `return_n_iter` is True.\n\nintercept : float or ndarray of shape (n_targets,)\n The intercept of the model. Only returned if `return_intercept`\n is True and if X is a scipy sparse array.\n\nNotes\n-----\nThis function won't compute the intercept." - }, - { - "name": "_ridge_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_gcv_mode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_find_smallest_angle", - "decorators": [], - "parameters": [ - { - "name": "query", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Normalized query vector." - }, - { - "name": "vectors", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Vectors to which we compare query, as columns. Must be normalized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the column of vectors that is most aligned with the query.\n\nBoth query and the columns of vectors must have their l2 norm equal to 1.\n\nParameters\n----------\nquery : ndarray of shape (n_samples,)\n Normalized query vector.\n\nvectors : ndarray of shape (n_samples, n_features)\n Vectors to which we compare query, as columns. Must be normalized." - } - ] - }, - { - "name": "sklearn.linear_model._sag", - "imports": [ - "import warnings", - "import numpy as np", - "from _base import make_dataset", - "from _sag_fast import sag32", - "from _sag_fast import sag64", - "from exceptions import ConvergenceWarning", - "from utils import check_array", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import row_norms" - ], - "classes": [], - "functions": [ - { - "name": "get_auto_step_size", - "decorators": [], - "parameters": [ - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples." - }, - { - "name": "alpha_scaled", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term, scaled by 1. / n_samples, the number of samples." - }, - { - "name": "loss", - "type": "Literal['log', 'squared', 'multinomial']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function used in SAG solver." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) will be added to the decision function." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of rows in X. Useful if is_saga=True." - }, - { - "name": "is_saga", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return step size for the SAGA algorithm or the SAG algorithm." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute automatic step size for SAG solver.\n\nThe step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is\nthe max sum of squares for over all samples.\n\nParameters\n----------\nmax_squared_sum : float\n Maximum squared sum of X over samples.\n\nalpha_scaled : float\n Constant that multiplies the regularization term, scaled by\n 1. / n_samples, the number of samples.\n\nloss : {'log', 'squared', 'multinomial'}\n The loss function used in SAG solver.\n\nfit_intercept : bool\n Specifies if a constant (a.k.a. bias or intercept) will be\n added to the decision function.\n\nn_samples : int, default=None\n Number of rows in X. Useful if is_saga=True.\n\nis_saga : bool, default=False\n Whether to return step size for the SAGA algorithm or the SAG\n algorithm.\n\nReturns\n-------\nstep_size : float\n Step size used in SAG solver.\n\nReferences\n----------\nSchmidt, M., Roux, N. L., & Bach, F. (2013).\nMinimizing finite sums with the stochastic average gradient\nhttps://hal.inria.fr/hal-00860051/document\n\nDefazio, A., Bach F. & Lacoste-Julien S. (2014).\nSAGA: A Fast Incremental Gradient Method With Support\nfor Non-Strongly Convex Composite Objectives\nhttps://arxiv.org/abs/1407.0202" - }, - { - "name": "sag_solver", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. With loss='multinomial', y must be label encoded (see preprocessing.LabelEncoder)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - }, - { - "name": "loss", - "type": "Literal['log', 'squared', 'multinomial']", - "hasDefault": true, - "default": "'log'", - "limitation": null, - "ignored": false, - "docstring": "Loss function that will be optimized: -'log' is the binary logistic loss, as used in LogisticRegression. -'squared' is the squared loss, as used in Ridge. -'multinomial' is the multinomial logistic loss, as used in LogisticRegression. .. versionadded:: 0.18 *loss='multinomial'*" - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "L2 regularization term in the objective function ``(0.5 * alpha * || W ||_F^2)``." - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L1 regularization term in the objective function ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The max number of passes over the training data if the stopping criteria is not reached." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The stopping criteria for the weights. The iterations will stop when max(change in weights) / max(weights) < tol." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, the input arrays X and y will not be checked." - }, - { - "name": "max_squared_sum", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum squared sum of X over samples. If None, it will be computed, going through all the samples. The value should be precomputed to speed up cross validation." - }, - { - "name": "warm_start_mem", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initialization parameters used for warm starting. Warm starting is currently used in LogisticRegression but not in Ridge. It contains: - 'coef': the weight vector, with the intercept in last line if the intercept is fitted. - 'gradient_memory': the scalar gradient for all seen samples. - 'sum_gradient': the sum of gradient over all seen samples, for each feature. - 'intercept_sum_gradient': the sum of gradient over all seen samples, for the intercept. - 'seen': array of boolean describing the seen samples. - 'num_seen': the number of seen samples." - }, - { - "name": "is_saga", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves better in the first epochs, and allow for l1 regularisation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "SAG solver for Ridge and LogisticRegression.\n\nSAG stands for Stochastic Average Gradient: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na constant learning rate.\n\nIMPORTANT NOTE: 'sag' solver converges faster on columns that are on the\nsame scale. You can normalize the data by using\nsklearn.preprocessing.StandardScaler on your data before passing it to the\nfit method.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values for the features. It will\nfit the data according to squared loss or log loss.\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using the squared euclidean norm L2.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values. With loss='multinomial', y must be label encoded\n (see preprocessing.LabelEncoder).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nloss : {'log', 'squared', 'multinomial'}, default='log'\n Loss function that will be optimized:\n -'log' is the binary logistic loss, as used in LogisticRegression.\n -'squared' is the squared loss, as used in Ridge.\n -'multinomial' is the multinomial logistic loss, as used in\n LogisticRegression.\n\n .. versionadded:: 0.18\n *loss='multinomial'*\n\nalpha : float, default=1.\n L2 regularization term in the objective function\n ``(0.5 * alpha * || W ||_F^2)``.\n\nbeta : float, default=0.\n L1 regularization term in the objective function\n ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True.\n\nmax_iter : int, default=1000\n The max number of passes over the training data if the stopping\n criteria is not reached.\n\ntol : double, default=0.001\n The stopping criteria for the weights. The iterations will stop when\n max(change in weights) / max(weights) < tol.\n\nverbose : int, default=0\n The verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when shuffling the data. Pass an int for reproducible output\n across multiple function calls.\n See :term:`Glossary `.\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. If None, it will be computed,\n going through all the samples. The value should be precomputed\n to speed up cross validation.\n\nwarm_start_mem : dict, default=None\n The initialization parameters used for warm starting. Warm starting is\n currently used in LogisticRegression but not in Ridge.\n It contains:\n - 'coef': the weight vector, with the intercept in last line\n if the intercept is fitted.\n - 'gradient_memory': the scalar gradient for all seen samples.\n - 'sum_gradient': the sum of gradient over all seen samples,\n for each feature.\n - 'intercept_sum_gradient': the sum of gradient over all seen\n samples, for the intercept.\n - 'seen': array of boolean describing the seen samples.\n - 'num_seen': the number of seen samples.\n\nis_saga : bool, default=False\n Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves\n better in the first epochs, and allow for l1 regularisation.\n\nReturns\n-------\ncoef_ : ndarray of shape (n_features,)\n Weight vector.\n\nn_iter_ : int\n The number of full pass on all samples.\n\nwarm_start_mem : dict\n Contains a 'coef' key with the fitted result, and possibly the\n fitted intercept at the end of the array. Contains also other keys\n used for warm starting.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import linear_model\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> X = rng.randn(n_samples, n_features)\n>>> y = rng.randn(n_samples)\n>>> clf = linear_model.Ridge(solver='sag')\n>>> clf.fit(X, y)\nRidge(solver='sag')\n\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> clf = linear_model.LogisticRegression(\n... solver='sag', multi_class='multinomial')\n>>> clf.fit(X, y)\nLogisticRegression(multi_class='multinomial', solver='sag')\n\nReferences\n----------\nSchmidt, M., Roux, N. L., & Bach, F. (2013).\nMinimizing finite sums with the stochastic average gradient\nhttps://hal.inria.fr/hal-00860051/document\n\nDefazio, A., Bach F. & Lacoste-Julien S. (2014).\nSAGA: A Fast Incremental Gradient Method With Support\nfor Non-Strongly Convex Composite Objectives\nhttps://arxiv.org/abs/1407.0202\n\nSee Also\n--------\nRidge, SGDRegressor, ElasticNet, Lasso, SVR,\nLogisticRegression, SGDClassifier, LinearSVC, Perceptron" - } - ] - }, - { - "name": "sklearn.linear_model._stochastic_gradient", - "imports": [ - "import numpy as np", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from joblib import Parallel", - "from base import clone", - "from base import is_classifier", - "from _base import LinearClassifierMixin", - "from _base import SparseCoefMixin", - "from _base import make_dataset", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from utils import check_array", - "from utils import check_random_state", - "from utils import check_X_y", - "from utils.extmath import safe_sparse_dot", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from exceptions import ConvergenceWarning", - "from model_selection import StratifiedShuffleSplit", - "from model_selection import ShuffleSplit", - "from _sgd_fast import _plain_sgd", - "from utils import compute_class_weight", - "from _sgd_fast import Hinge", - "from _sgd_fast import SquaredHinge", - "from _sgd_fast import Log", - "from _sgd_fast import ModifiedHuber", - "from _sgd_fast import SquaredLoss", - "from _sgd_fast import Huber", - "from _sgd_fast import EpsilonInsensitive", - "from _sgd_fast import SquaredEpsilonInsensitive", - "from utils.fixes import _joblib_parallel_args", - "from utils import deprecated" - ], - "classes": [ - { - "name": "_ValidationScoreCallback", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Callback for early stopping based on validation score" - }, - { - "name": "BaseSGD", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [ - { - "name": "**kwargs", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator parameters." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Set and validate the parameters of estimator.\n\nParameters\n----------\n**kwargs : dict\n Estimator parameters.\n\nReturns\n-------\nself : object\n Estimator instance." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit model." - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate input params. " - }, - { - "name": "_get_loss_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get concrete ``LossFunction`` object for str ``loss``. " - }, - { - "name": "_get_learning_rate_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_penalty_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_allocate_parameter_mem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Allocate mem for parameters; initialize if provided." - }, - { - "name": "_make_validation_split", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split the dataset between training set and validation set.\n\nParameters\n----------\ny : ndarray of shape (n_samples, )\n Target values.\n\nReturns\n-------\nvalidation_mask : ndarray of shape (n_samples, )\n Equal to 1 on the validation set, 0 on the training set." - }, - { - "name": "_make_validation_score_cb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "standard_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "standard_intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "average_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "average_intercept_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for SGD classification and regression." - }, - { - "name": "BaseSGDClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a binary classifier on X and y. " - }, - { - "name": "_fit_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a multi-class classifier by combining binary classifiers\n\nEach binary classifier predicts one class versus all others. This\nstrategy is called OvA (One versus All) or OvR (One versus Rest)." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of the target values." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained by via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples. If not provided, uniform weights are assumed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of the training data.\n\ny : ndarray of shape (n_samples,)\n Subset of the target values.\n\nclasses : ndarray of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\nReturns\n-------\nself :\n Returns an instance of self." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples. If not provided, uniform weights are assumed. These weights will be multiplied with class_weight (passed through the constructor) if class_weight is specified." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\ncoef_init : ndarray of shape (n_classes, n_features), default=None\n The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (n_classes,), default=None\n The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed. These weights will\n be multiplied with class_weight (passed through the\n constructor) if class_weight is specified.\n\nReturns\n-------\nself :\n Returns an instance of self." - } - ], - "docstring": null - }, - { - "name": "SGDClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "'hinge'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used. Defaults to 'hinge', which gives a linear SVM. The possible options are 'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss', 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. The 'log' loss gives logistic regression, a probabilistic classifier. 'modified_huber' is another smooth loss that brings tolerance to outliers as well as probability estimates. 'squared_hinge' is like hinge but is quadratically penalized. 'perceptron' is the linear loss used by the perceptron algorithm. The other losses are designed for regression but can be useful in classification as well; see :class:`~sklearn.linear_model.SGDRegressor` for a description. More details about the losses formulas can be found in the :ref:`User Guide `." - }, - { - "name": "penalty", - "type": "Literal['l2', 'l1', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The penalty (aka regularization term) to be used. Defaults to 'l2' which is the standard regularizer for linear SVM models. 'l1' and 'elasticnet' might bring sparsity to the model (feature selection) not achievable with 'l2'." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term. The higher the value, the stronger the regularization. Also used to compute the learning rate when set to `learning_rate` is set to 'optimal'." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Only used if `penalty` is 'elasticnet'." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, training will stop when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive epochs. .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon in the epsilon-insensitive loss functions; only if `loss` is 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. For 'huber', determines the threshold at which it becomes less important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "learning_rate", - "type": "str", - "hasDefault": true, - "default": "'optimal'", - "limitation": null, - "ignored": false, - "docstring": "The learning rate schedule: - 'constant': `eta = eta0` - 'optimal': `eta = 1.0 / (alpha * (t + t0))` where t0 is chosen by a heuristic proposed by Leon Bottou. - 'invscaling': `eta = eta0 / pow(t, power_t)` - 'adaptive': eta = eta0, as long as the training keeps decreasing. Each time n_iter_no_change consecutive epochs fail to decrease the training loss by tol or fail to increase validation score by tol if early_stopping is True, the current learning rate is divided by 5. .. versionadded:: 0.20 Added 'adaptive' option" - }, - { - "name": "eta0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate for the 'constant', 'invscaling' or 'adaptive' schedules. The default value is 0.0 as eta0 is not used by the default schedule 'optimal'." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate [default 0.5]." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate training when validation score returned by the `score` method is not improving by at least tol for n_iter_no_change consecutive epochs. .. versionadded:: 0.20 Added 'early_stopping' option" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if `early_stopping` is True. .. versionadded:: 0.20 Added 'validation_fraction' option" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20 Added 'n_iter_no_change' option" - }, - { - "name": "class_weight", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled. If a dynamic learning rate is used, the learning rate is adapted depending on the number of samples already seen. Calling ``fit`` resets this counter, while ``partial_fit`` will result in increasing the existing counter." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights accross all updates and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches `average`. So ``average=10`` will begin averaging after seeing 10 samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data for prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nMulticlass probability estimates are derived from binary (one-vs.-rest)\nestimates by simple normalization, as recommended by Zadrozny and\nElkan.\n\nBinary probability estimates for loss=\"modified_huber\" are given by\n(clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\nit is necessary to perform proper probability calibration by wrapping\nthe classifier with\n:class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data for prediction.\n\nReturns\n-------\nndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n\nReferences\n----------\nZadrozny and Elkan, \"Transforming classifier scores into multiclass\nprobability estimates\", SIGKDD'02,\nhttp://www.research.ibm.com/people/z/zadrozny/kdd2002-Transf.pdf\n\nThe justification for the formula in the loss=\"modified_huber\"\ncase is in the appendix B in:\nhttp://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf" - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data for prediction." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Log of probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nWhen loss=\"modified_huber\", probability estimates may be hard zeros\nand ones, so taking the logarithm is not possible.\n\nSee ``predict_proba`` for details.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data for prediction.\n\nReturns\n-------\nT : array-like, shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in\n `self.classes_`." - }, - { - "name": "_predict_log_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : str, default='hinge'\n The loss function to be used. Defaults to 'hinge', which gives a\n linear SVM.\n\n The possible options are 'hinge', 'log', 'modified_huber',\n 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n\n The 'log' loss gives logistic regression, a probabilistic classifier.\n 'modified_huber' is another smooth loss that brings tolerance to\n outliers as well as probability estimates.\n 'squared_hinge' is like hinge but is quadratically penalized.\n 'perceptron' is the linear loss used by the perceptron algorithm.\n The other losses are designed for regression but can be useful in\n classification as well; see\n :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\nn_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nlearning_rate : str, default='optimal'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\neta0 : double, default=0.0\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n the default schedule 'optimal'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate [default 0.5].\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least tol for n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nloss_function_ : concrete ``LossFunction``\n\nclasses_ : array of shape (n_classes,)\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nSee Also\n--------\nsklearn.svm.LinearSVC : Linear support vector classification.\nLogisticRegression : Logistic regression.\nPerceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n penalty=None)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> Y = np.array([1, 1, 2, 2])\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> clf = make_pipeline(StandardScaler(),\n... SGDClassifier(max_iter=1000, tol=1e-3))\n>>> clf.fit(X, Y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdclassifier', SGDClassifier())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]" - }, - { - "name": "BaseSGDRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Subset of target values" - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples. If not provided, uniform weights are assumed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of training data\n\ny : numpy array of shape (n_samples,)\n Subset of target values\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "coef_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial coefficients to warm-start the optimization." - }, - { - "name": "intercept_init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The initial intercept to warm-start the optimization." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights applied to individual samples (1. for unweighted)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,)\n Target values\n\ncoef_init : ndarray of shape (n_features,), default=None\n The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (1,), default=None\n The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n\nReturns\n-------\nndarray of shape (n_samples,)\n Predicted target values per element in X." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the linear model\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n\nReturns\n-------\nndarray of shape (n_samples,)\n Predicted target values per element in X." - }, - { - "name": "_fit_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SGDRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "loss", - "type": "str", - "hasDefault": true, - "default": "'squared_loss'", - "limitation": null, - "ignored": false, - "docstring": "The loss function to be used. The possible values are 'squared_loss', 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive' The 'squared_loss' refers to the ordinary least squares fit. 'huber' modifies 'squared_loss' to focus less on getting outliers correct by switching from squared to linear loss past a distance of epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is linear past that; this is the loss function used in SVR. 'squared_epsilon_insensitive' is the same but becomes squared loss past a tolerance of epsilon. More details about the losses formulas can be found in the :ref:`User Guide `." - }, - { - "name": "penalty", - "type": "Literal['l2', 'l1', 'elasticnet']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The penalty (aka regularization term) to be used. Defaults to 'l2' which is the standard regularizer for linear SVM models. 'l1' and 'elasticnet' might bring sparsity to the model (feature selection) not achievable with 'l2'." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the regularization term. The higher the value, the stronger the regularization. Also used to compute the learning rate when set to `learning_rate` is set to 'optimal'." - }, - { - "name": "l1_ratio", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Only used if `penalty` is 'elasticnet'." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether the intercept should be estimated or not. If False, the data is assumed to be already centered." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The stopping criterion. If it is not None, training will stop when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive epochs. .. versionadded:: 0.19" - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the training data should be shuffled after each epoch." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon in the epsilon-insensitive loss functions; only if `loss` is 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'. For 'huber', determines the threshold at which it becomes less important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used for shuffling the data, when ``shuffle`` is set to ``True``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "learning_rate", - "type": "str", - "hasDefault": true, - "default": "'invscaling'", - "limitation": null, - "ignored": false, - "docstring": "The learning rate schedule: - 'constant': `eta = eta0` - 'optimal': `eta = 1.0 / (alpha * (t + t0))` where t0 is chosen by a heuristic proposed by Leon Bottou. - 'invscaling': `eta = eta0 / pow(t, power_t)` - 'adaptive': eta = eta0, as long as the training keeps decreasing. Each time n_iter_no_change consecutive epochs fail to decrease the training loss by tol or fail to increase validation score by tol if early_stopping is True, the current learning rate is divided by 5. .. versionadded:: 0.20 Added 'adaptive' option" - }, - { - "name": "eta0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate for the 'constant', 'invscaling' or 'adaptive' schedules. The default value is 0.01." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a fraction of training data as validation and terminate training when validation score returned by the `score` method is not improving by at least `tol` for `n_iter_no_change` consecutive epochs. .. versionadded:: 0.20 Added 'early_stopping' option" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if `early_stopping` is True. .. versionadded:: 0.20 Added 'validation_fraction' option" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations with no improvement to wait before early stopping. .. versionadded:: 0.20 Added 'n_iter_no_change' option" - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled. If a dynamic learning rate is used, the learning rate is adapted depending on the number of samples already seen. Calling ``fit`` resets this counter, while ``partial_fit`` will result in increasing the existing counter." - }, - { - "name": "average", - "type": "Union[int, bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, computes the averaged SGD weights accross all updates and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches `average`. So ``average=10`` will begin averaging after seeing 10 samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear model fitted by minimizing a regularized empirical loss with SGD\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : str, default='squared_loss'\n The loss function to be used. The possible values are 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n The 'squared_loss' refers to the ordinary least squares fit.\n 'huber' modifies 'squared_loss' to focus less on getting outliers\n correct by switching from squared to linear loss past a distance of\n epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n linear past that; this is the loss function used in SVR.\n 'squared_epsilon_insensitive' is the same but becomes squared loss past\n a tolerance of epsilon.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\nrandom_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nlearning_rate : string, default='invscaling'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\neta0 : double, default=0.01\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.01.\n\npower_t : double, default=0.25\n The exponent for inverse scaling learning rate.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least `tol` for `n_iter_no_change` consecutive\n epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,)\n The intercept term.\n\naverage_coef_ : ndarray of shape (n_features,)\n Averaged weights assigned to the features. Only available\n if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_coef_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\naverage_intercept_ : ndarray of shape (1,)\n The averaged intercept term. Only available if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_intercept_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\nn_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDRegressor\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> reg = make_pipeline(StandardScaler(),\n... SGDRegressor(max_iter=1000, tol=1e-3))\n>>> reg.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdregressor', SGDRegressor())])\n\nSee Also\n--------\nRidge, ElasticNet, Lasso, sklearn.svm.SVR" - } - ], - "functions": [ - { - "name": "_prepare_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialization for fit_binary.\n\nReturns y, coef, intercept, average_coef, average_intercept." - }, - { - "name": "fit_binary", - "decorators": [], - "parameters": [ - { - "name": "est", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to fit" - }, - { - "name": "i", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the positive class" - }, - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data" - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values" - }, - { - "name": "alpha", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization parameter" - }, - { - "name": "C", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum step size for passive aggressive" - }, - { - "name": "learning_rate", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The learning rate. Accepted values are 'constant', 'optimal', 'invscaling', 'pa1' and 'pa2'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations (epochs)" - }, - { - "name": "pos_weight", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of the positive class" - }, - { - "name": "neg_weight", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of the negative class" - }, - { - "name": "sample_weight", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The weight of each sample" - }, - { - "name": "validation_mask", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed validation mask in case _fit_binary is called in the context of a one-vs-rest reduction." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit a single binary classifier.\n\nThe i'th class is considered the \"positive\" class.\n\nParameters\n----------\nest : Estimator object\n The estimator to fit\n\ni : int\n Index of the positive class\n\nX : numpy array or sparse matrix of shape [n_samples,n_features]\n Training data\n\ny : numpy array of shape [n_samples, ]\n Target values\n\nalpha : float\n The regularization parameter\n\nC : float\n Maximum step size for passive aggressive\n\nlearning_rate : string\n The learning rate. Accepted values are 'constant', 'optimal',\n 'invscaling', 'pa1' and 'pa2'.\n\nmax_iter : int\n The maximum number of iterations (epochs)\n\npos_weight : float\n The weight of the positive class\n\nneg_weight : float\n The weight of the negative class\n\nsample_weight : numpy array of shape [n_samples, ]\n The weight of each sample\n\nvalidation_mask : numpy array of shape [n_samples, ], default=None\n Precomputed validation mask in case _fit_binary is called in the\n context of a one-vs-rest reduction.\n\nrandom_state : int, RandomState instance, default=None\n If int, random_state is the seed used by the random number generator;\n If RandomState instance, random_state is the random number generator;\n If None, the random number generator is the RandomState instance used\n by `np.random`." - } - ] - }, - { - "name": "sklearn.linear_model._theil_sen", - "imports": [ - "import warnings", - "from itertools import combinations", - "import numpy as np", - "from scipy import linalg", - "from scipy.special import binom", - "from scipy.linalg.lapack import get_lapack_funcs", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _base import LinearModel", - "from base import RegressorMixin", - "from utils import check_random_state", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "TheilSenRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations." - }, - { - "name": "copy_X", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, X will be copied; else, it may be overwritten." - }, - { - "name": "max_subpopulation", - "type": "int", - "hasDefault": true, - "default": "1e4", - "limitation": null, - "ignored": false, - "docstring": "Instead of computing with a set of cardinality 'n choose k', where n is the number of samples and k is the number of subsamples (at least number of features), consider only a stochastic subpopulation of a given maximal size if 'n choose k' is larger than max_subpopulation. For other than small problem sizes this parameter will determine memory usage and runtime if n_subsamples is not changed." - }, - { - "name": "n_subsamples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to calculate the parameters. This is at least the number of features (plus 1 if fit_intercept=True) and the number of samples as a maximum. A lower number leads to a higher breakdown point and a low efficiency while a high number leads to a low breakdown point and a high efficiency. If None, take the minimum number of subsamples leading to maximal robustness. If n_subsamples is set to n_samples, Theil-Sen is identical to least squares." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the calculation of spatial median." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Tolerance when calculating spatial median." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A random number generator instance to define the state of the random permutations generator. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbose mode when fitting the model." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_subparams", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit linear model.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data.\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : returns an instance of self." - } - ], - "docstring": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_subpopulation : int, default=1e4\n Instead of computing with a set of cardinality 'n choose k', where n is\n the number of samples and k is the number of subsamples (at least\n number of features), consider only a stochastic subpopulation of a\n given maximal size if 'n choose k' is larger than max_subpopulation.\n For other than small problem sizes this parameter will determine\n memory usage and runtime if n_subsamples is not changed.\n\nn_subsamples : int, default=None\n Number of samples to calculate the parameters. This is at least the\n number of features (plus 1 if fit_intercept=True) and the number of\n samples as a maximum. A lower number leads to a higher breakdown\n point and a low efficiency while a high number leads to a low\n breakdown point and a high efficiency. If None, take the\n minimum number of subsamples leading to maximal robustness.\n If n_subsamples is set to n_samples, Theil-Sen is identical to least\n squares.\n\nmax_iter : int, default=300\n Maximum number of iterations for the calculation of spatial median.\n\ntol : float, default=1.e-3\n Tolerance when calculating spatial median.\n\nrandom_state : int, RandomState instance or None, default=None\n A random number generator instance to define the state of the random\n permutations generator. Pass an int for reproducible output across\n multiple function calls.\n See :term:`Glossary `\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n Coefficients of the regression model (median of distribution).\n\nintercept_ : float\n Estimated intercept of regression model.\n\nbreakdown_ : float\n Approximated breakdown point.\n\nn_iter_ : int\n Number of iterations needed for the spatial median.\n\nn_subpopulation_ : int\n Number of combinations taken into account from 'n choose k', where n is\n the number of samples and k is the number of subsamples.\n\nExamples\n--------\n>>> from sklearn.linear_model import TheilSenRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n... n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9884...\n>>> reg.predict(X[:1,])\narray([-31.5871...])\n\nReferences\n----------\n- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n http://home.olemiss.edu/~xdang/papers/MTSE.pdf" - } - ], - "functions": [ - { - "name": "_modified_weiszfeld_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "x_old", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Current start vector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Modified Weiszfeld step.\n\nThis function defines one iteration step in order to approximate the\nspatial median (L1 median). It is a form of an iteratively re-weighted\nleast squares method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nx_old : ndarray of shape = (n_features,)\n Current start vector.\n\nReturns\n-------\nx_new : ndarray of shape (n_features,)\n New iteration step.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf" - }, - { - "name": "_spatial_median", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Stop the algorithm if spatial_median has converged." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Spatial median (L1 median).\n\nThe spatial median is member of a class of so-called M-estimators which\nare defined by an optimization problem. Given a number of p points in an\nn-dimensional space, the point x minimizing the sum of all distances to the\np other points is called spatial median.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nmax_iter : int, default=300\n Maximum number of iterations.\n\ntol : float, default=1.e-3\n Stop the algorithm if spatial_median has converged.\n\nReturns\n-------\nspatial_median : ndarray of shape = (n_features,)\n Spatial median.\n\nn_iter : int\n Number of iterations needed.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf" - }, - { - "name": "_breakdown_point", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - }, - { - "name": "n_subsamples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of subsamples to consider." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Approximation of the breakdown point.\n\nParameters\n----------\nn_samples : int\n Number of samples.\n\nn_subsamples : int\n Number of subsamples to consider.\n\nReturns\n-------\nbreakdown_point : float\n Approximation of breakdown point." - }, - { - "name": "_lstsq", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Design matrix, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector, where n_samples is the number of samples." - }, - { - "name": "indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of all subsamples with respect to the chosen subpopulation." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fit intercept or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Least Squares Estimator for TheilSenRegressor class.\n\nThis function calculates the least squares method on a subset of rows of X\nand y defined by the indices array. Optionally, an intercept column is\nadded if intercept is set to true.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Design matrix, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : ndarray of shape (n_samples,)\n Target vector, where n_samples is the number of samples.\n\nindices : ndarray of shape (n_subpopulation, n_subsamples)\n Indices of all subsamples with respect to the chosen subpopulation.\n\nfit_intercept : bool\n Fit intercept or not.\n\nReturns\n-------\nweights : ndarray of shape (n_subpopulation, n_features + intercept)\n Solution matrix of n_subpopulation solved least square problems." - } - ] - }, - { - "name": "sklearn.linear_model", - "imports": [ - "from _base import LinearRegression", - "from _bayes import BayesianRidge", - "from _bayes import ARDRegression", - "from _least_angle import Lars", - "from _least_angle import LassoLars", - "from _least_angle import lars_path", - "from _least_angle import lars_path_gram", - "from _least_angle import LarsCV", - "from _least_angle import LassoLarsCV", - "from _least_angle import LassoLarsIC", - "from _coordinate_descent import Lasso", - "from _coordinate_descent import ElasticNet", - "from _coordinate_descent import LassoCV", - "from _coordinate_descent import ElasticNetCV", - "from _coordinate_descent import lasso_path", - "from _coordinate_descent import enet_path", - "from _coordinate_descent import MultiTaskLasso", - "from _coordinate_descent import MultiTaskElasticNet", - "from _coordinate_descent import MultiTaskElasticNetCV", - "from _coordinate_descent import MultiTaskLassoCV", - "from _glm import PoissonRegressor", - "from _glm import GammaRegressor", - "from _glm import TweedieRegressor", - "from _huber import HuberRegressor", - "from _sgd_fast import Hinge", - "from _sgd_fast import Log", - "from _sgd_fast import ModifiedHuber", - "from _sgd_fast import SquaredLoss", - "from _sgd_fast import Huber", - "from _stochastic_gradient import SGDClassifier", - "from _stochastic_gradient import SGDRegressor", - "from _ridge import Ridge", - "from _ridge import RidgeCV", - "from _ridge import RidgeClassifier", - "from _ridge import RidgeClassifierCV", - "from _ridge import ridge_regression", - "from _logistic import LogisticRegression", - "from _logistic import LogisticRegressionCV", - "from _omp import orthogonal_mp", - "from _omp import orthogonal_mp_gram", - "from _omp import OrthogonalMatchingPursuit", - "from _omp import OrthogonalMatchingPursuitCV", - "from _passive_aggressive import PassiveAggressiveClassifier", - "from _passive_aggressive import PassiveAggressiveRegressor", - "from _perceptron import Perceptron", - "from _ransac import RANSACRegressor", - "from _theil_sen import TheilSenRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model.tests.test_base", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from scipy import linalg", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils import check_random_state", - "from sklearn.utils.fixes import parse_version", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model._base import _preprocess_data", - "from sklearn.linear_model._base import _rescale_data", - "from sklearn.linear_model._base import make_dataset", - "from sklearn.datasets import make_sparse_uncorrelated", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import load_iris" - ], - "classes": [], - "functions": [ - { - "name": "test_linear_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_positive_and_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_sample_weights_greater_than_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sparse_equal_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_multiple_outcome", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_sparse_multiple_outcome", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive_multiple_outcome", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive_vs_nonpositive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_positive_vs_nonpositive_when_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_regression_pd_sparse_dataframe_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_data_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_data_weighted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_preprocess_data_with_return_mean", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_csr_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preprocess_copy_data_no_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_preprocess_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rescale_data_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fused_types_make_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_bayes", - "imports": [ - "from math import log", - "import numpy as np", - "from scipy.linalg import pinvh", - "import pytest", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils import check_random_state", - "from sklearn.linear_model import BayesianRidge", - "from sklearn.linear_model import ARDRegression", - "from sklearn.linear_model import Ridge", - "from sklearn import datasets", - "from sklearn.utils.extmath import fast_logdet" - ], - "classes": [], - "functions": [ - { - "name": "test_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check value of n_iter." - }, - { - "name": "test_bayesian_ridge_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check scores attribute shape" - }, - { - "name": "test_bayesian_ridge_score_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check value of score on toy example.\n\nCompute log marginal likelihood with equation (36) in Sparse Bayesian\nLearning and the Relevance Vector Machine (Tipping, 2001):\n\n- 0.5 * (log |Id/alpha + X.X^T/lambda| +\n y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))\n+ lambda_1 * log(lambda) - lambda_2 * lambda\n+ alpha_1 * log(alpha) - alpha_2 * alpha\n\nand check equality with the score computed during training." - }, - { - "name": "test_bayesian_ridge_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_bayesian_ridge_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_initial_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prediction_bayesian_ridge_ard_with_constant_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_std_bayesian_ridge_ard_with_constant_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_update_of_sigma_in_ard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_ard_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ard_accuracy_on_easy_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_return_std", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_update_sigma", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ard_regression_predict_normalize_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we can predict with `normalize=True` and `return_std=True`.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/18605" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_coordinate_descent", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import interpolate", - "from scipy import sparse", - "from copy import deepcopy", - "import joblib", - "from sklearn.base import is_classifier", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import make_regression", - "from sklearn.model_selection import train_test_split", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.utils.fixes import parse_version", - "from sklearn.linear_model import ARDRegression", - "from sklearn.linear_model import BayesianRidge", - "from sklearn.linear_model import ElasticNet", - "from sklearn.linear_model import ElasticNetCV", - "from sklearn.linear_model import enet_path", - "from sklearn.linear_model import Lars", - "from sklearn.linear_model import lars_path", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import LassoCV", - "from sklearn.linear_model import LassoLars", - "from sklearn.linear_model import LassoLarsCV", - "from sklearn.linear_model import LassoLarsIC", - "from sklearn.linear_model import lasso_path", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import MultiTaskElasticNet", - "from sklearn.linear_model import MultiTaskElasticNetCV", - "from sklearn.linear_model import MultiTaskLasso", - "from sklearn.linear_model import MultiTaskLassoCV", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import RidgeClassifier", - "from sklearn.linear_model import RidgeCV", - "from sklearn.linear_model._coordinate_descent import _set_order", - "from sklearn.utils import check_array", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_l1_ratio_param_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_order_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that _set_order returns arrays with promised order." - }, - { - "name": "test_set_order_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that _set_order returns sparse matrices in promised format." - }, - { - "name": "test_lasso_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "build_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "build an ill-posed linear regression problem with many noisy features and\ncomparatively few samples" - }, - { - "name": "test_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_cv_with_some_model_selection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_cv_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_model_pipeline_same_as_normalize_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_model_pipeline_same_dense_and_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_path_return_models_vs_new_return_gives_same_coefficients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_path_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_alpha_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_cv_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_task_lasso_and_enet", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_task_lasso_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_enetcv_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multitask_enet_and_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_multioutput_enet_and_multitask_enet_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_multioutput_lasso_and_multitask_lasso_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input_dtype_enet_and_lassocv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precompute_invalid_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_convergence_with_regularizer_decrement", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_path_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_dense_descent_paths", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_input_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_copy_X_True", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_copy_X_False_check_input_False", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_overrided_gram_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_non_float_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_float_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coef_shape_not_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_multitask_lasso", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_coordinate_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that a warning is issued if model does not converge" - }, - { - "name": "test_convergence_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lassoCV_does_not_set_precompute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_task_lasso_cv_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_sample_weight_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the impact of sample_weight is consistent." - }, - { - "name": "test_enet_sample_weight_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_models_cv_fit_for_all_backends", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_huber", - "imports": [ - "import numpy as np", - "from scipy import optimize", - "from scipy import sparse", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import HuberRegressor", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import SGDRegressor", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model._huber import _huber_loss_and_gradient" - ], - "classes": [], - "functions": [ - { - "name": "make_regression_with_outliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_equals_lr_for_high_epsilon", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_scaling_invariant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_and_sgd_same_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_better_r2_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huber_bool", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_least_angle", - "imports": [ - "import warnings", - "import numpy as np", - "import pytest", - "from scipy import linalg", - "from sklearn.base import clone", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.utils.fixes import np_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn import linear_model", - "from sklearn import datasets", - "from sklearn.linear_model._least_angle import _lars_path_residues", - "from sklearn.linear_model import LassoLarsIC", - "from sklearn.linear_model import lars_path", - "from sklearn.linear_model import Lars", - "from sklearn.linear_model import LassoLars", - "from io import StringIO", - "import sys" - ], - "classes": [], - "functions": [ - { - "name": "test_simple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simple_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_assert_same_lars_path_result", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_path_gram_equivalent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_x_none_gram_none_raises_value_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_lstsq", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_gives_lstsq_solution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_collinearity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_path_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_path_all_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_precompute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rank_deficient_design", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_path_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_ill_conditioned", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_ill_conditioned2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_add_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_n_nonzero_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_cv_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_ic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_path_readonly_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lars_path_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimatorclasses_positive_constraint", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_lasso_cd_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_vs_R_implementation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_lars_copyX_behaviour", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that user input regarding copy_X is not being overridden (it was until\nat least version 0.21)" - }, - { - "name": "test_lasso_lars_fit_copyX_behaviour", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that user input to .fit for copy_X overrides default __init__ value" - }, - { - "name": "test_lars_with_jitter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_none_gram_not_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_copy_X_with_auto_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_logistic", - "imports": [ - "import os", - "import sys", - "import warnings", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "import scipy.sparse as sp", - "from scipy import linalg", - "from scipy import optimize", - "from scipy import sparse", - "import pytest", - "from sklearn.base import clone", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import get_scorer", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import compute_class_weight", - "from sklearn.utils import _IS_32BIT", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils import shuffle", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.preprocessing import scale", - "from sklearn.utils._testing import skip_if_no_parallel", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model._logistic import LogisticRegression", - "from sklearn.linear_model._logistic import _logistic_regression_path", - "from sklearn.linear_model._logistic import LogisticRegressionCV", - "from sklearn.linear_model._logistic import _logistic_loss_and_grad", - "from sklearn.linear_model._logistic import _logistic_grad_hess", - "from sklearn.linear_model._logistic import _multinomial_grad_hess", - "from sklearn.linear_model._logistic import _logistic_loss", - "from sklearn.linear_model._logistic import _log_reg_scoring_path" - ], - "classes": [], - "functions": [ - { - "name": "check_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the model is able to fit the classification data" - }, - { - "name": "test_predict_2_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_mock_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_score_does_not_warn_by_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lr_liblinear_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_3_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_solver_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_binary_probabilities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparsify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inconsistent_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_write_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistency_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_path_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_dual_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_loss_and_grad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_grad_hess", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_multinomial_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_logistic_regression_string_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_cv_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_intercept_logistic_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multinomial_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_solvers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_solvers_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regressioncv_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_class_weight_dictionary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_multinomial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_grad_hess", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_decision_function_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_logregcv_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_saga_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_intercept_scaling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_intercept_scaling_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_l1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_l1_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_cv_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logreg_predict_proba_multinomial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_saga_vs_liblinear", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_converge_LR", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_coeffs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_l1_l2_equivalence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_vs_l1_l2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegression_elastic_net_objective", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_GridSearchCV_elastic_net", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_no_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_LogisticRegressionCV_elasticnet_attribute_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_ratio_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_ratios_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elastic_net_versus_sgd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_path_coefs_multinomial", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_regression_multi_class_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_penalty_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logisticregression_liblinear_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scores_attribute_layout_elasticnet", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_identifiability_on_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the multinomial classification is identifiable.\n\nA multinomial with c classes can be modeled with\nprobability_k = exp(X@coef_k) / sum(exp(X@coef_l), l=1..c) for k=1..c.\nThis is not identifiable, unless one chooses a further constraint.\nAccording to [1], the maximum of the L2 penalized likelihood automatically\nsatisfies the symmetric constraint:\nsum(coef_k, k=1..c) = 0\n\nFurther details can be found in the appendix of [2].\n\nReference\n---------\n.. [1] Zhu, Ji and Trevor J. Hastie. \"Classification of gene microarrays by\npenalized logistic regression\". Biostatistics 5 3 (2004): 427-43.\nhttps://doi.org/10.1093/biostatistics%2Fkxg046\n\n.. [2] Powers, Scott, Trevor J. Hastie and Robert Tibshirani. \"Nuclear\npenalized multinomial regression with an application to predicting at bat\noutcomes in baseball.\" Statistical modelling 18 5-6 (2017): 388-410 .\nhttps://arxiv.org/pdf/1706.10272.pdf" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_omp", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.linear_model import orthogonal_mp", - "from sklearn.linear_model import orthogonal_mp_gram", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model import OrthogonalMatchingPursuitCV", - "from sklearn.linear_model import LinearRegression", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import make_sparse_coded_signal" - ], - "classes": [], - "functions": [ - { - "name": "test_correct_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_shapes_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_nonzero_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tol", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_with_without_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_with_without_gram_tol", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unreachable_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perfect_signal_recovery", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_orthogonal_mp_gram_readonly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_identical_regressors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_swapped_regressors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_atoms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_return_path_prop_with_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_omp_reaches_least_squares", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_passive_aggressive", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.base import ClassifierMixin", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import load_iris", - "from sklearn.linear_model import PassiveAggressiveClassifier", - "from sklearn.linear_model import PassiveAggressiveRegressor" - ], - "classes": [ - { - "name": "MyPassiveAggressive", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "project", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_classifier_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_correctness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_undefined_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_weight_class_balanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_equal_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_mse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_correctness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_undefined_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_passive_aggressive_deprecated_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_perceptron", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import load_iris", - "from sklearn.linear_model import Perceptron" - ], - "classes": [ - { - "name": "MyPerceptron", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "project", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_perceptron_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perceptron_correctness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_undefined_methods", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perceptron_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that `l1_ratio` has an impact when `penalty='elasticnet'`" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_ransac", - "imports": [ - "import numpy as np", - "from scipy import sparse", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import RANSACRegressor", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model._ransac import _dynamic_max_trials", - "from sklearn.exceptions import ConvergenceWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_ransac_inliers_outliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_is_data_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_is_model_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_max_trials", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_stop_n_inliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_stop_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_resid_thresh_no_inliers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_no_valid_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_no_valid_model", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_exceed_max_skips", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_warn_exceed_max_skips", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_sparse_coo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_sparse_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_sparse_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_none_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_min_n_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_multi_dimensional_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_residual_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_default_residual_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_dynamic_max_trials", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_fit_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ransac_final_model_fit_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_ridge", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from itertools import product", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn import datasets", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import get_scorer", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import ridge_regression", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model._ridge import _RidgeGCV", - "from sklearn.linear_model import RidgeCV", - "from sklearn.linear_model import RidgeClassifier", - "from sklearn.linear_model import RidgeClassifierCV", - "from sklearn.linear_model._ridge import _solve_cholesky", - "from sklearn.linear_model._ridge import _solve_cholesky_kernel", - "from sklearn.linear_model._ridge import _check_gcv_mode", - "from sklearn.linear_model._ridge import _X_CenterStackOp", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_classification", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.utils import check_random_state", - "from sklearn.datasets import make_multilabel_classification" - ], - "classes": [], - "functions": [ - { - "name": "_accuracy_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_mean_squared_error_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_primal_dual_relationship", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_singular", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_convergence_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_toy_ridge_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_vs_lstsq", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_individual_penalties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_CenterStackOp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_gram", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_sparse_offset_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_solver_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_gcv_vs_ridge_loo_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_loo_cv_asym_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_gcv_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_gcv_mode_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_gcv_mode_choice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_loo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_cv_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_gcv_cv_values_not_stored", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_best_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_cv_individual_penalties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_multi_ridge_diabetes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ridge_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_classifier_with_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_custom_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_sparse_svd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_vs_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check class_weights resemble sample_weights behavior." - }, - { - "name": "test_class_weights_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_store_cv_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_classifier_cv_store_cv_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_sample_weights_greater_than_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_design_with_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_int_alphas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridgecv_negative_alphas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_solver_not_supported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_cg_max_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_fit_intercept_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_fit_intercept_sparse_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_fit_intercept_sparse_sag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_check_arguments_validity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check if all combinations of arguments give valid estimations" - }, - { - "name": "test_ridge_classifier_no_support_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_match_cholesky", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_regression_dtype_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ridge_sag_with_X_fortran", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_sag", - "imports": [ - "import math", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.special import logsumexp", - "from sklearn.linear_model._sag import get_auto_step_size", - "from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model._base import make_dataset", - "from sklearn.linear_model._logistic import _multinomial_loss_grad", - "from sklearn.utils.extmath import row_norms", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils import compute_class_weight", - "from sklearn.utils import check_random_state", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.base import clone" - ], - "classes": [], - "functions": [ - { - "name": "log_dloss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "log_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "squared_dloss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "squared_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_pobj", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "sag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "sag_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_step_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_matching", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_matching", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sag_pobj_matches_logistic_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag pobj matches log reg" - }, - { - "name": "test_sag_pobj_matches_ridge_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag pobj matches ridge reg" - }, - { - "name": "test_sag_regressor_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag regressor is computed correctly" - }, - { - "name": "test_get_auto_step_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sag_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the sag regressor performs well" - }, - { - "name": "test_sag_classifier_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the binary classifier is computed correctly" - }, - { - "name": "test_sag_multiclass_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if the multiclass classifier is computed correctly" - }, - { - "name": "test_classifier_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if classifier results match target" - }, - { - "name": "test_binary_classifier_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests binary classifier with classweights for each class" - }, - { - "name": "test_multiclass_classifier_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests multiclass with classweights for each class" - }, - { - "name": "test_classifier_single_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "tests if ValueError is thrown with only one class" - }, - { - "name": "test_step_size_alpha_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multinomial_loss_ground_truth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sag_classifier_raises_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_sgd", - "imports": [ - "import pickle", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.fixes import parse_version", - "from sklearn import linear_model", - "from sklearn import datasets", - "from sklearn import metrics", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.preprocessing import scale", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.model_selection import StratifiedShuffleSplit", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.linear_model import _sgd_fast as sgd_fast", - "from sklearn.model_selection import RandomizedSearchCV" - ], - "classes": [ - { - "name": "_SparseSGDClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_SparseSGDRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_update_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SGDClassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SGDRegressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SparseSGDClassifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "SparseSGDRegressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "asgd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_penalty", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plain_has_no_average_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_deprecated_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_late_onset_averaging_not_reached", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_late_onset_averaging_reached", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_alpha_for_optimal_learning_rate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaptive_longer_than_constant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_set_not_used_for_training", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_no_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_enough_sample_for_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_learning_rate_schedule", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_bad_eta0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_max_iter_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_shuffle_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_early_stopping_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_validation_fraction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_n_iter_no_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_argument_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_provide_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_early_stopping_with_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_intercept_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_binary_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_intercept_to_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_at_least_two_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_weight_class_balanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass_with_init_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_multiclass_njobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_coef_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_predict_proba_method_access", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_l1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_equal_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_class_weight_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weights_multiplied", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balanced_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_wrong_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_multiclass_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_then_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_equal_fit_classif", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_losses", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiple_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_reg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_averaged_computed_correctly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_averaged_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_least_squares_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_epsilon_insensitive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_huber_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_elasticnet_convergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_equal_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loss_function_epsilon", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_underflow_or_overlow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numerical_stability_large_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_large_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tol_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_gradient_common", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_hinge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_squared_hinge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_log", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_squared_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_huber", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_modified_huber", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_epsilon_insensitive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_squared_epsilon_insensitive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_thread_multi_class_and_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_core_gridsearch_and_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_SGDClassifier_fit_for_all_backends", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_sparse_coordinate_descent", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_warns", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import ElasticNet", - "from sklearn.linear_model import LassoCV", - "from sklearn.linear_model import ElasticNetCV" - ], - "classes": [], - "functions": [ - { - "name": "test_sparse_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lasso_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_toy_list_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_toy_explicit_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_sparse_enet_not_as_toy_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_enet_not_as_toy_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_lasso_not_as_toy_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_enet_multitarget", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_path_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_output_sparse_dense_lasso_and_enet_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_multiple_output_sparse_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_enet_coordinate_descent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that a warning is issued if model does not converge" - } - ] - }, - { - "name": "sklearn.linear_model.tests.test_theil_sen", - "imports": [ - "import os", - "import sys", - "from contextlib import contextmanager", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_less", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_warns", - "from scipy.linalg import norm", - "from scipy.optimize import fmin_bfgs", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import TheilSenRegressor", - "from sklearn.linear_model._theil_sen import _spatial_median", - "from sklearn.linear_model._theil_sen import _breakdown_point", - "from sklearn.linear_model._theil_sen import _modified_weiszfeld_step", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises" - ], - "classes": [], - "functions": [ - { - "name": "no_stdout_stderr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "gen_toy_problem_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "gen_toy_problem_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "gen_toy_problem_4d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_modweiszfeld_step_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_modweiszfeld_step_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spatial_median_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spatial_median_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_1d_no_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calc_breakdown_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_negative_subpopulation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_too_few_subsamples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_too_many_subsamples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checksubparams_n_subsamples_if_less_samples_than_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subpopulation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsamples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_theil_sen_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_less_samples_than_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model._glm.glm", - "imports": [ - "import numbers", - "import numpy as np", - "import scipy.optimize", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from utils import check_array", - "from utils import check_X_y", - "from utils.optimize import _check_optimize_result", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_sample_weight", - "from _loss.glm_distribution import ExponentialDispersionModel", - "from _loss.glm_distribution import TweedieDistribution", - "from _loss.glm_distribution import EDM_DISTRIBUTIONS", - "from link import BaseLink", - "from link import IdentityLink", - "from link import LogLink" - ], - "classes": [ - { - "name": "GeneralizedLinearRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "family", - "type": "Literal['normal', 'poisson', 'gamma', 'inverse-gaussian']", - "hasDefault": true, - "default": "'normal'", - "limitation": null, - "ignored": false, - "docstring": "The distributional assumption of the GLM, i.e. which distribution from the EDM, specifies the loss function to be minimized." - }, - { - "name": "link", - "type": "Literal['auto', 'identity', 'log']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The link function of the GLM, i.e. mapping from linear predictor `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets the link depending on the chosen family as follows: - 'identity' for Normal distribution - 'log' for Poisson, Gamma and Inverse Gaussian distributions" - }, - { - "name": "solver", - "type": "Literal['lbfgs']", - "hasDefault": true, - "default": "'lbfgs'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use in the optimization problem: 'lbfgs' Calls scipy's L-BFGS-B optimizer." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit a Generalized Linear Model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "_linear_predictor", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the linear_predictor = `X @ coef_ + intercept_`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_pred : array of shape (n_samples,)\n Returns predicted values of linear predictor." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using GLM with feature matrix X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_pred : array of shape (n_samples,)\n Returns predicted values." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True values of target." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute D^2, the percentage of deviance explained.\n\nD^2 is a generalization of the coefficient of determination R^2.\nR^2 uses squared error and D^2 deviance. Note that those two are equal\nfor ``family='normal'``.\n\nD^2 is defined as\n:math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n:math:`D_{null}` is the null deviance, i.e. the deviance of a model\nwith intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.\nThe mean :math:`\\bar{y}` is averaged by sample_weight.\nBest possible score is 1.0 and it can be negative (because the model\ncan be arbitrarily worse).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,)\n True values of target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n D^2 of self.predict(X) w.r.t. y." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at\nfitting and predicting the mean of the target y as y_pred=h(X*w).\nTherefore, the fit minimizes the following objective function with L2\npriors as regularizer::\n\n 1/(2*sum(s)) * deviance(y, h(X*w); s)\n + 1/2 * alpha * |w|_2\n\nwith inverse link function h and s=sample_weight.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nfamily : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} or an ExponentialDispersionModel instance, default='normal'\n The distributional assumption of the GLM, i.e. which distribution from\n the EDM, specifies the loss function to be minimized.\n\nlink : {'auto', 'identity', 'log'} or an instance of class BaseLink, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\nsolver : 'lbfgs', default='lbfgs'\n Algorithm to use in the optimization problem:\n\n 'lbfgs'\n Calls scipy's L-BFGS-B optimizer.\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_``.\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver." - }, - { - "name": "PoissonRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_`` ." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Generalized Linear Model with a Poisson distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n----------\n>>> from sklearn import linear_model\n>>> clf = linear_model.PoissonRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [12, 17, 22, 21]\n>>> clf.fit(X, y)\nPoissonRegressor()\n>>> clf.score(X, y)\n0.990...\n>>> clf.coef_\narray([0.121..., 0.158...])\n>>> clf.intercept_\n2.088...\n>>> clf.predict([[1, 1], [3, 4]])\narray([10.676..., 21.875...])" - }, - { - "name": "GammaRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_`` ." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Generalized Linear Model with a Gamma distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X * coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.GammaRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [19, 26, 33, 30]\n>>> clf.fit(X, y)\nGammaRegressor()\n>>> clf.score(X, y)\n0.773...\n>>> clf.coef_\narray([0.072..., 0.066...])\n>>> clf.intercept_\n2.896...\n>>> clf.predict([[1, 0], [2, 8]])\narray([19.483..., 35.795...])" - }, - { - "name": "TweedieRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "power", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The power determines the underlying target distribution according to the following table: +-------+------------------------+ | Power | Distribution | +=======+========================+ | 0 | Normal | +-------+------------------------+ | 1 | Poisson | +-------+------------------------+ | (1,2) | Compound Poisson Gamma | +-------+------------------------+ | 2 | Gamma | +-------+------------------------+ | 3 | Inverse Gaussian | +-------+------------------------+ For ``0 < power < 1``, no distribution exists." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Constant that multiplies the penalty term and thus determines the regularization strength. ``alpha = 0`` is equivalent to unpenalized GLMs. In this case, the design matrix `X` must have full column rank (no collinearities)." - }, - { - "name": "link", - "type": "Literal['auto', 'identity', 'log']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The link function of the GLM, i.e. mapping from linear predictor `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets the link depending on the chosen family as follows: - 'identity' for Normal distribution - 'log' for Poisson, Gamma and Inverse Gaussian distributions" - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if a constant (a.k.a. bias or intercept) should be added to the linear predictor (X @ coef + intercept)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The maximal number of iterations for the solver." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. For the lbfgs solver, the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol`` where ``g_j`` is the j-th component of the gradient (derivative) of the objective function." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If set to ``True``, reuse the solution of the previous call to ``fit`` as initialization for ``coef_`` and ``intercept_`` ." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "For the lbfgs solver set verbose to any positive number for verbosity." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\npower : float, default=0\n The power determines the underlying target distribution according\n to the following table:\n\n +-------+------------------------+\n | Power | Distribution |\n +=======+========================+\n | 0 | Normal |\n +-------+------------------------+\n | 1 | Poisson |\n +-------+------------------------+\n | (1,2) | Compound Poisson Gamma |\n +-------+------------------------+\n | 2 | Gamma |\n +-------+------------------------+\n | 3 | Inverse Gaussian |\n +-------+------------------------+\n\n For ``0 < power < 1``, no distribution exists.\n\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nlink : {'auto', 'identity', 'log'}, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n----------\n>>> from sklearn import linear_model\n>>> clf = linear_model.TweedieRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [2, 3.5, 5, 5.5]\n>>> clf.fit(X, y)\nTweedieRegressor()\n>>> clf.score(X, y)\n0.839...\n>>> clf.coef_\narray([0.599..., 0.299...])\n>>> clf.intercept_\n1.600...\n>>> clf.predict([[1, 1], [3, 4]])\narray([2.500..., 4.599...])" - } - ], - "functions": [ - { - "name": "_safe_lin_pred", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the linear predictor taking care if intercept is present." - }, - { - "name": "_y_pred_deviance_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute y_pred and the derivative of the deviance w.r.t coef." - } - ] - }, - { - "name": "sklearn.linear_model._glm.link", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numpy as np", - "from scipy.special import expit", - "from scipy.special import logit" - ], - "classes": [ - { - "name": "BaseLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (predicted) mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the link function g(y_pred).\n\nThe link function links the mean y_pred=E[Y] to the so called linear\npredictor (X*w), i.e. g(y_pred) = linear predictor.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Usually the (predicted) mean." - }, - { - "name": "derivative", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (predicted) mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the link g'(y_pred).\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Usually the (predicted) mean." - }, - { - "name": "inverse", - "decorators": [], - "parameters": [ - { - "name": "lin_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (fitted) linear predictor." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the inverse link function h(lin_pred).\n\nGives the inverse relationship between linear predictor and the mean\ny_pred=E[Y], i.e. h(linear predictor) = y_pred.\n\nParameters\n----------\nlin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor." - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [ - { - "name": "lin_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Usually the (fitted) linear predictor." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the inverse link function h'(lin_pred).\n\nParameters\n----------\nlin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor." - } - ], - "docstring": "Abstract base class for Link functions." - }, - { - "name": "IdentityLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The identity link function g(x)=x." - }, - { - "name": "LogLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The log link function g(x)=log(x)." - }, - { - "name": "LogitLink", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "The logit link function g(x)=logit(x)." - } - ], - "functions": [] - }, - { - "name": "sklearn.linear_model._glm", - "imports": [ - "from glm import GeneralizedLinearRegressor", - "from glm import PoissonRegressor", - "from glm import GammaRegressor", - "from glm import TweedieRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.linear_model._glm.tests.test_glm", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "import pytest", - "import warnings", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model._glm import GeneralizedLinearRegressor", - "from sklearn.linear_model import TweedieRegressor", - "from sklearn.linear_model import PoissonRegressor", - "from sklearn.linear_model import GammaRegressor", - "from sklearn.linear_model._glm.link import IdentityLink", - "from sklearn.linear_model._glm.link import LogLink", - "from sklearn._loss.glm_distribution import TweedieDistribution", - "from sklearn._loss.glm_distribution import NormalDistribution", - "from sklearn._loss.glm_distribution import PoissonDistribution", - "from sklearn._loss.glm_distribution import GammaDistribution", - "from sklearn._loss.glm_distribution import InverseGaussianDistribution", - "from sklearn.linear_model import Ridge", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.model_selection import train_test_split" - ], - "classes": [], - "functions": [ - { - "name": "regression_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weights_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test the raised errors in the validation of sample_weight." - }, - { - "name": "test_glm_family_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM family argument set as string." - }, - { - "name": "test_glm_link_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM link argument set as string." - }, - { - "name": "test_glm_link_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_glm_alpha_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid alpha argument." - }, - { - "name": "test_glm_fit_intercept_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid fit_intercept argument." - }, - { - "name": "test_glm_solver_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid solver argument." - }, - { - "name": "test_glm_max_iter_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid max_iter argument." - }, - { - "name": "test_glm_tol_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid tol argument." - }, - { - "name": "test_glm_warm_start_argument", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM for invalid warm_start argument." - }, - { - "name": "test_glm_identity_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM regression with identity link on a simple dataset." - }, - { - "name": "test_glm_sample_weight_consistentcy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the impact of sample_weight is consistent" - }, - { - "name": "test_glm_log_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test GLM regression with log link on a simple dataset." - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normal_ridge_comparison", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare with Ridge regression for Normal distributions." - }, - { - "name": "test_poisson_glmnet", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare Poisson regression with L2 regularization and LogLink to glmnet\n " - }, - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_regression_family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gamma_regression_family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tweedie_regression_family", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model._glm.tests.test_link", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "import pytest", - "from scipy.optimize import check_grad", - "from sklearn.linear_model._glm.link import IdentityLink", - "from sklearn.linear_model._glm.link import LogLink", - "from sklearn.linear_model._glm.link import LogitLink" - ], - "classes": [], - "functions": [ - { - "name": "test_link_properties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test link inverse and derivative." - }, - { - "name": "test_link_derivative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.linear_model._glm.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.manifold.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold._isomap", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from neighbors import NearestNeighbors", - "from neighbors import kneighbors_graph", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.graph import graph_shortest_path", - "from decomposition import KernelPCA", - "from preprocessing import KernelCenterer" - ], - "classes": [ - { - "name": "Isomap", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "number of neighbors to consider for each point." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "number of coordinates for the manifold" - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "'auto' : Attempt to choose the most efficient solver for the given problem. 'arpack' : Use Arnoldi decomposition to find the eigenvalues and eigenvectors. 'dense' : Use a direct solver (i.e. LAPACK) for the eigenvalue decomposition." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance passed to arpack or lobpcg. not used if eigen_solver == 'dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the arpack solver. not used if eigen_solver == 'dense'." - }, - { - "name": "path_method", - "type": "Literal['auto', 'FW', 'D']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Method to use in finding shortest path. 'auto' : attempt to choose the best algorithm automatically. 'FW' : Floyd-Warshall algorithm. 'D' : Dijkstra's algorithm." - }, - { - "name": "neighbors_algorithm", - "type": "Literal['auto', 'brute', 'kd_tree', 'ball_tree']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Algorithm to use for nearest neighbors search, passed to neighbors.NearestNeighbors instance." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "\"minkowski\"", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a :term:`Glossary `. .. versionadded:: 0.22" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used. .. versionadded:: 0.22" - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "reconstruction_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the reconstruction error for the embedding.\n\nReturns\n-------\nreconstruction_error : float\n\nNotes\n-----\nThe cost function of an isomap embedding is\n\n``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\nWhere D is the matrix of distances for the input data X,\nD_fit is the matrix of distances for the output embedding X_fit,\nand K is the isomap kernel:\n\n``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, shape = (n_samples, n_features), in the form of a numpy array, sparse graph, precomputed tree, or NearestNeighbors object." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the embedding vectors for data X\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array, sparse graph, precomputed tree, or NearestNeighbors\n object.\n\ny : Ignored\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree}\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If neighbors_algorithm='precomputed', X is assumed to be a distance matrix or a sparse graph of shape (n_queries, n_samples_fit)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features)\n If neighbors_algorithm='precomputed', X is assumed to be a\n distance matrix or a sparse graph of shape\n (n_queries, n_samples_fit).\n\nReturns\n-------\nX_new : array-like, shape (n_queries, n_components)" - } - ], - "docstring": "Isomap Embedding\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n number of neighbors to consider for each point.\n\nn_components : int, default=2\n number of coordinates for the manifold\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n 'auto' : Attempt to choose the most efficient solver\n for the given problem.\n\n 'arpack' : Use Arnoldi decomposition to find the eigenvalues\n and eigenvectors.\n\n 'dense' : Use a direct solver (i.e. LAPACK)\n for the eigenvalue decomposition.\n\ntol : float, default=0\n Convergence tolerance passed to arpack or lobpcg.\n not used if eigen_solver == 'dense'.\n\nmax_iter : int, default=None\n Maximum number of iterations for the arpack solver.\n not used if eigen_solver == 'dense'.\n\npath_method : {'auto', 'FW', 'D'}, default='auto'\n Method to use in finding shortest path.\n\n 'auto' : attempt to choose the best algorithm automatically.\n\n 'FW' : Floyd-Warshall algorithm.\n\n 'D' : Dijkstra's algorithm.\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, default='auto'\n Algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmetric : string, or callable, default=\"minkowski\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `.\n\n .. versionadded:: 0.22\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n .. versionadded:: 0.22\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nembedding_ : array-like, shape (n_samples, n_components)\n Stores the embedding vectors.\n\nkernel_pca_ : object\n :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n embedding.\n\nnbrs_ : sklearn.neighbors.NearestNeighbors instance\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\ndist_matrix_ : array-like, shape (n_samples, n_samples)\n Stores the geodesic distance matrix of training data.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import Isomap\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = Isomap(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n.. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n framework for nonlinear dimensionality reduction. Science 290 (5500)" - } - ], - "functions": [] - }, - { - "name": "sklearn.manifold._locally_linear", - "imports": [ - "import numpy as np", - "from scipy.linalg import eigh", - "from scipy.linalg import svd", - "from scipy.linalg import qr", - "from scipy.linalg import solve", - "from scipy.sparse import eye", - "from scipy.sparse import csr_matrix", - "from scipy.sparse.linalg import eigsh", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from base import _UnstableArchMixin", - "from utils import check_random_state", - "from utils import check_array", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import stable_cumsum", - "from utils.validation import check_is_fitted", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from neighbors import NearestNeighbors" - ], - "classes": [ - { - "name": "LocallyLinearEmbedding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "number of neighbors to consider for each point." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "number of coordinates for the manifold" - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "regularization constant, multiplies the trace of the local covariance matrix of the distances." - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "auto : algorithm will attempt to choose the best method for input data arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for 'arpack' method Not used if eigen_solver=='dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "maximum number of iterations for the arpack solver. Not used if eigen_solver=='dense'." - }, - { - "name": "method", - "type": "Literal['standard', 'hessian', 'modified', 'ltsa']", - "hasDefault": true, - "default": "'standard'", - "limitation": null, - "ignored": false, - "docstring": "standard : use the standard locally linear embedding algorithm. see reference [1] hessian : use the Hessian eigenmap method. This method requires ``n_neighbors > n_components * (1 + (n_components + 1) / 2`` see reference [2] modified : use the modified locally linear embedding algorithm. see reference [3] ltsa : use local tangent space alignment algorithm see reference [4]" - }, - { - "name": "hessian_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for Hessian eigenmapping method. Only used if ``method == 'hessian'``" - }, - { - "name": "modified_tol", - "type": "float", - "hasDefault": true, - "default": "1e-12", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for modified LLE method. Only used if ``method == 'modified'``" - }, - { - "name": "neighbors_algorithm", - "type": "Literal['auto', 'brute', 'kd_tree', 'ball_tree']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "algorithm to use for nearest neighbors search, passed to neighbors.NearestNeighbors instance" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator when ``eigen_solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the embedding vectors for data X\n\nParameters\n----------\nX : array-like of shape [n_samples, n_features]\n training set.\n\ny : Ignored\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the embedding vectors for data X and transform X.\n\nParameters\n----------\nX : array-like of shape [n_samples, n_features]\n training set.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform new points into embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : array, shape = [n_samples, n_components]\n\nNotes\n-----\nBecause of scaling performed by this method, it is discouraged to use\nit together with methods that are not scale-invariant (like SVMs)" - } - ], - "docstring": "Locally Linear Embedding\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n number of neighbors to consider for each point.\n\nn_components : int, default=2\n number of coordinates for the manifold\n\nreg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n maximum number of iterations for the arpack solver.\n Not used if eigen_solver=='dense'.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n standard : use the standard locally linear embedding algorithm. see\n reference [1]\n hessian : use the Hessian eigenmap method. This method requires\n ``n_neighbors > n_components * (1 + (n_components + 1) / 2``\n see reference [2]\n modified : use the modified locally linear embedding algorithm.\n see reference [3]\n ltsa : use local tangent space alignment algorithm\n see reference [4]\n\nhessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if ``method == 'hessian'``\n\nmodified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if ``method == 'modified'``\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, default='auto'\n algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when\n ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n across multiple function calls. See :term: `Glossary `.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nembedding_ : array-like, shape [n_samples, n_components]\n Stores the embedding vectors\n\nreconstruction_error_ : float\n Reconstruction error associated with `embedding_`\n\nnbrs_ : NearestNeighbors object\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import LocallyLinearEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = LocallyLinearEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)" - } - ], - "functions": [ - { - "name": "barycenter_weights", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "indices", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of the points in Y used to compute the barycenter" - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "amount of regularization to add for the problem to be well-posed in the case of n_neighbors > n_dim" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_dim)\n\nY : array-like, shape (n_samples, n_dim)\n\nindices : array-like, shape (n_samples, n_dim)\n Indices of the points in Y used to compute the barycenter\n\nreg : float, default=1e-3\n amount of regularization to add for the problem to be\n well-posed in the case of n_neighbors > n_dim\n\nReturns\n-------\nB : array-like, shape (n_samples, n_neighbors)\n\nNotes\n-----\nSee developers note for more information." - }, - { - "name": "barycenter_kneighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, shape = (n_samples, n_features), in the form of a numpy array or a NearestNeighbors object." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample." - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Amount of regularization when solving the least-squares problem. Only relevant if mode='barycenter'. If None, use the default." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the barycenter weighted graph of k-Neighbors for points in X\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n Number of neighbors for each sample.\n\nreg : float, default=1e-3\n Amount of regularization when solving the least-squares\n problem. Only relevant if mode='barycenter'. If None, use the\n default.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix in CSR format, shape = [n_samples, n_samples]\n A[i, j] is assigned the weight of edge that connects i to j.\n\nSee Also\n--------\nsklearn.neighbors.kneighbors_graph\nsklearn.neighbors.radius_neighbors_graph" - }, - { - "name": "null_space", - "decorators": [], - "parameters": [ - { - "name": "M", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input covariance matrix: should be symmetric positive semi-definite" - }, - { - "name": "k", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of eigenvalues/vectors to return" - }, - { - "name": "k_skip", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of low eigenvalues to skip." - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'arpack'", - "limitation": null, - "ignored": false, - "docstring": "auto : algorithm will attempt to choose the best method for input data arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for 'arpack' method. Not used if eigen_solver=='dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for 'arpack' method. Not used if eigen_solver=='dense'" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator when ``solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the null space of a matrix M.\n\nParameters\n----------\nM : {array, matrix, sparse matrix, LinearOperator}\n Input covariance matrix: should be symmetric positive semi-definite\n\nk : int\n Number of eigenvalues/vectors to return\n\nk_skip : int, default=1\n Number of low eigenvalues to skip.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='arpack'\n auto : algorithm will attempt to choose the best method for input data\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method.\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n Maximum number of iterations for 'arpack' method.\n Not used if eigen_solver=='dense'\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `." - }, - { - "name": "locally_linear_embedding", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, shape = (n_samples, n_features), in the form of a numpy array or a NearestNeighbors object." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of neighbors to consider for each point." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of coordinates for the manifold." - }, - { - "name": "reg", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "regularization constant, multiplies the trace of the local covariance matrix of the distances." - }, - { - "name": "eigen_solver", - "type": "Literal['auto', 'arpack', 'dense']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "auto : algorithm will attempt to choose the best method for input data arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for 'arpack' method Not used if eigen_solver=='dense'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "maximum number of iterations for the arpack solver." - }, - { - "name": "method", - "type": "Literal['standard', 'hessian', 'modified', 'ltsa']", - "hasDefault": true, - "default": "'standard'", - "limitation": null, - "ignored": false, - "docstring": "standard : use the standard locally linear embedding algorithm. see reference [1]_ hessian : use the Hessian eigenmap method. This method requires n_neighbors > n_components * (1 + (n_components + 1) / 2. see reference [2]_ modified : use the modified locally linear embedding algorithm. see reference [3]_ ltsa : use local tangent space alignment algorithm see reference [4]_" - }, - { - "name": "hessian_tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for Hessian eigenmapping method. Only used if method == 'hessian'" - }, - { - "name": "modified_tol", - "type": "float", - "hasDefault": true, - "default": "1e-12", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for modified LLE method. Only used if method == 'modified'" - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator when ``solver`` == 'arpack'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n number of neighbors to consider for each point.\n\nn_components : int\n number of coordinates for the manifold.\n\nreg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n maximum number of iterations for the arpack solver.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n standard : use the standard locally linear embedding algorithm.\n see reference [1]_\n hessian : use the Hessian eigenmap method. This method requires\n n_neighbors > n_components * (1 + (n_components + 1) / 2.\n see reference [2]_\n modified : use the modified locally linear embedding algorithm.\n see reference [3]_\n ltsa : use local tangent space alignment algorithm\n see reference [4]_\n\nhessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if method == 'hessian'\n\nmodified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if method == 'modified'\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nY : array-like, shape [n_samples, n_components]\n Embedding vectors.\n\nsquared_error : float\n Reconstruction error for the embedding vectors. Equivalent to\n ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)" - } - ] - }, - { - "name": "sklearn.manifold._mds", - "imports": [ - "import numpy as np", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "import warnings", - "from base import BaseEstimator", - "from metrics import euclidean_distances", - "from utils import check_random_state", - "from utils import check_array", - "from utils import check_symmetric", - "from isotonic import IsotonicRegression", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated", - "from utils.fixes import delayed" - ], - "classes": [ - { - "name": "MDS", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dimensions in which to immerse the dissimilarities." - }, - { - "name": "metric", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``True``, perform metric MDS; otherwise, perform nonmetric MDS." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "4", - "limitation": null, - "ignored": false, - "docstring": "Number of times the SMACOF algorithm will be run with different initializations. The final results will be the best output of the runs, determined by the run with the smallest final stress." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the SMACOF algorithm for a single run." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Level of verbosity." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with respect to stress at which to declare convergence." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. If multiple initializations are used (``n_init``), each run of the algorithm is computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "dissimilarity", - "type": "Literal['euclidean', 'precomputed']", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Dissimilarity measure to use: - 'euclidean': Pairwise Euclidean distances between points in the dataset. - 'precomputed': Pre-computed dissimilarities are passed directly to ``fit`` and ``fit_transform``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If ``dissimilarity=='precomputed'``, the input should be the dissimilarity matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the SMACOF algorithm. By default, the algorithm is initialized with a randomly chosen array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the position of the points in the embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\ny : Ignored\n\ninit : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If ``dissimilarity=='precomputed'``, the input should be the dissimilarity matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the SMACOF algorithm. By default, the algorithm is initialized with a randomly chosen array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the data from X, and returns the embedded coordinates.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\ny : Ignored\n\ninit : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array." - } - ], - "docstring": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities.\n\nmetric : bool, default=True\n If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n\nn_init : int, default=4\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\ndissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n Dissimilarity measure to use:\n\n - 'euclidean':\n Pairwise Euclidean distances between points in the dataset.\n\n - 'precomputed':\n Pre-computed dissimilarities are passed directly to ``fit`` and\n ``fit_transform``.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n Stores the position of the dataset in the embedding space.\n\nstress_ : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\ndissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Symmetric matrix that:\n\n - either uses a custom dissimilarity matrix by setting `dissimilarity`\n to 'precomputed';\n - or constructs a dissimilarity matrix from data using\n Euclidean distances.\n\nn_iter_ : int\n The number of iterations corresponding to the best stress.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import MDS\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = MDS(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)" - } - ], - "functions": [ - { - "name": "_smacof_single", - "decorators": [], - "parameters": [ - { - "name": "dissimilarities", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pairwise dissimilarities between the points. Must be symmetric." - }, - { - "name": "metric", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Compute metric or nonmetric SMACOF algorithm." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dimensions in which to immerse the dissimilarities. If an ``init`` array is provided, this option is overridden and the shape of ``init`` is used to determine the dimensionality of the embedding space." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the algorithm. By default, the algorithm is initialized with a randomly chosen array." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the SMACOF algorithm for a single run." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Level of verbosity." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with respect to stress at which to declare convergence." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes multidimensional scaling using SMACOF algorithm.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\nstress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\nn_iter : int\n The number of iterations corresponding to the best stress." - }, - { - "name": "smacof", - "decorators": [], - "parameters": [ - { - "name": "dissimilarities", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pairwise dissimilarities between the points. Must be symmetric." - }, - { - "name": "metric", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Compute metric or nonmetric SMACOF algorithm." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of dimensions in which to immerse the dissimilarities. If an ``init`` array is provided, this option is overridden and the shape of ``init`` is used to determine the dimensionality of the embedding space." - }, - { - "name": "init", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting configuration of the embedding to initialize the algorithm. By default, the algorithm is initialized with a randomly chosen array." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "Number of times the SMACOF algorithm will be run with different initializations. The final results will be the best output of the runs, determined by the run with the smallest final stress. If ``init`` is provided, this option is overridden and a single run is performed." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. If multiple initializations are used (``n_init``), each run of the algorithm is computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations of the SMACOF algorithm for a single run." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Level of verbosity." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance with respect to stress at which to declare convergence." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "return_n_iter", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the number of iterations." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can summarized by the following steps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\nn_init : int, default=8\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress. If ``init`` is\n provided, this option is overridden and a single run is performed.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\nstress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\nn_iter : int\n The number of iterations corresponding to the best stress. Returned\n only if ``return_n_iter`` is set to ``True``.\n\nNotes\n-----\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)" - } - ] - }, - { - "name": "sklearn.manifold._spectral_embedding", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import sparse", - "from scipy.linalg import eigh", - "from scipy.sparse.linalg import eigsh", - "from scipy.sparse.csgraph import connected_components", - "from scipy.sparse.csgraph import laplacian as csgraph_laplacian", - "from base import BaseEstimator", - "from utils import check_array", - "from utils import check_random_state", - "from utils import check_symmetric", - "from utils._arpack import _init_arpack_v0", - "from utils.extmath import _deterministic_vector_sign_flip", - "from utils.fixes import lobpcg", - "from metrics.pairwise import rbf_kernel", - "from neighbors import kneighbors_graph", - "from neighbors import NearestNeighbors", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated", - "from pyamg import smoothed_aggregation_solver" - ], - "classes": [ - { - "name": "SpectralEmbedding", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The dimension of the projected subspace." - }, - { - "name": "affinity", - "type": "Literal['nearest_neighbors', 'rbf', 'precomputed', 'precomputed_nearest_neighbors']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "How to construct the affinity matrix. - 'nearest_neighbors' : construct the affinity matrix by computing a graph of nearest neighbors. - 'rbf' : construct the affinity matrix by computing a radial basis function (RBF) kernel. - 'precomputed' : interpret ``X`` as a precomputed affinity matrix. - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph of precomputed nearest neighbors, and constructs the affinity matrix by selecting the ``n_neighbors`` nearest neighbors. - callable : use passed in function as affinity the function takes in data matrix (n_samples, n_features) and return affinity matrix (n_samples, n_samples)." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for rbf kernel. If None, gamma will be set to 1/n_features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for the initialization of the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "eigen_solver", - "type": "Literal['arpack', 'lobpcg', 'amg']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems. If None, then ``'arpack'`` is used." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of nearest neighbors for nearest_neighbors graph building. If None, n_neighbors will be set to max(n_samples/10, 1)." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_affinity_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : array-like of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\nY: Ignored\n\nReturns\n-------\naffinity_matrix of shape (n_samples, n_samples)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features. If affinity is \"precomputed\" X : {array-like, sparse matrix}, shape (n_samples, n_samples), Interpret X as precomputed adjacency graph computed from samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features. If affinity is \"precomputed\" X : {array-like, sparse matrix} of shape (n_samples, n_samples), Interpret X as precomputed adjacency graph computed from samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like of shape (n_samples, n_components)" - } - ], - "docstring": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n The dimension of the projected subspace.\n\naffinity : {'nearest_neighbors', 'rbf', 'precomputed', 'precomputed_nearest_neighbors'} or callable, default='nearest_neighbors'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix by computing a radial basis\n function (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - callable : use passed in function as affinity\n the function takes in data matrix (n_samples, n_features)\n and return affinity matrix (n_samples, n_samples).\n\ngamma : float, default=None\n Kernel coefficient for rbf kernel. If None, gamma will be set to\n 1/n_features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for\n reproducible results across multiple function calls.\n See :term: `Glossary `.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems.\n If None, then ``'arpack'`` is used.\n\nn_neighbors : int, default=None\n Number of nearest neighbors for nearest_neighbors graph building.\n If None, n_neighbors will be set to max(n_samples/10, 1).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n Spectral embedding of the training matrix.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Affinity_matrix constructed from samples or precomputed.\n\nn_neighbors_ : int\n Number of nearest neighbors effectively used.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import SpectralEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = SpectralEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- On Spectral Clustering: Analysis and an algorithm, 2001\n Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324" - } - ], - "functions": [ - { - "name": "_graph_connected_component", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Adjacency matrix of the graph, non-zero weight means an edge between the nodes." - }, - { - "name": "node_id", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index of the query node of the graph." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the largest graph connected components that contains one\ngiven node.\n\nParameters\n----------\ngraph : array-like of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\nnode_id : int\n The index of the query node of the graph.\n\nReturns\n-------\nconnected_components_matrix : array-like of shape (n_samples,)\n An array of bool value indicating the indexes of the nodes\n belonging to the largest connected components of the given query\n node." - }, - { - "name": "_graph_is_connected", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Adjacency matrix of the graph, non-zero weight means an edge between the nodes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return whether the graph is connected (True) or Not (False).\n\nParameters\n----------\ngraph : {array-like, sparse matrix} of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\nReturns\n-------\nis_connected : bool\n True means the graph is fully connected and False means not." - }, - { - "name": "_set_diag", - "decorators": [], - "parameters": [ - { - "name": "laplacian", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The graph laplacian." - }, - { - "name": "value", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The value of the diagonal." - }, - { - "name": "norm_laplacian", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether the value of the diagonal should be changed or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set the diagonal of the laplacian matrix and convert it to a\nsparse format well suited for eigenvalue decomposition.\n\nParameters\n----------\nlaplacian : {ndarray, sparse matrix}\n The graph laplacian.\n\nvalue : float\n The value of the diagonal.\n\nnorm_laplacian : bool\n Whether the value of the diagonal should be changed or not.\n\nReturns\n-------\nlaplacian : {array, sparse matrix}\n An array of matrix in a form that is well suited to fast\n eigenvalue decomposition, depending on the band width of the\n matrix." - }, - { - "name": "spectral_embedding", - "decorators": [], - "parameters": [ - { - "name": "adjacency", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The adjacency matrix of the graph to embed." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "8", - "limitation": null, - "ignored": false, - "docstring": "The dimension of the projection subspace." - }, - { - "name": "eigen_solver", - "type": "Literal['arpack', 'lobpcg', 'amg']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator used for the initialization of the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - }, - { - "name": "eigen_tol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when using arpack eigen_solver." - }, - { - "name": "norm_laplacian", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, then compute normalized Laplacian." - }, - { - "name": "drop_first", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to drop the first eigenvector. For spectral embedding, this should be True as the first eigenvector should be constant vector for connected graph, but for spectral clustering, this should be kept as False to retain the first eigenvector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nadjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n The adjacency matrix of the graph to embed.\n\nn_components : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass\n an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nnorm_laplacian : bool, default=True\n If True, then compute normalized Laplacian.\n\ndrop_first : bool, default=True\n Whether to drop the first eigenvector. For spectral embedding, this\n should be True as the first eigenvector should be constant vector for\n connected graph, but for spectral clustering, this should be kept as\n False to retain the first eigenvector.\n\nReturns\n-------\nembedding : ndarray of shape (n_samples, n_components)\n The reduced samples.\n\nNotes\n-----\nSpectral Embedding (Laplacian Eigenmaps) is most useful when the graph\nhas one connected component. If there graph has many components, the first\nfew eigenvectors will simply uncover the connected components of the graph.\n\nReferences\n----------\n* https://en.wikipedia.org/wiki/LOBPCG\n\n* Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method\n Andrew V. Knyazev\n https://doi.org/10.1137%2FS1064827500366124" - } - ] - }, - { - "name": "sklearn.manifold._t_sne", - "imports": [ - "import warnings", - "from time import time", - "import numpy as np", - "from scipy import linalg", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import squareform", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from neighbors import NearestNeighbors", - "from base import BaseEstimator", - "from utils import check_random_state", - "from utils._openmp_helpers import _openmp_effective_n_threads", - "from utils.validation import check_non_negative", - "from utils.validation import _deprecate_positional_args", - "from decomposition import PCA", - "from metrics.pairwise import pairwise_distances", - "from None import _utils", - "from None import _barnes_hut_tsne" - ], - "classes": [ - { - "name": "TSNE", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Dimension of the embedded space." - }, - { - "name": "perplexity", - "type": "float", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. Different values can result in significantly different results." - }, - { - "name": "early_exaggeration", - "type": "float", - "hasDefault": true, - "default": "12", - "limitation": null, - "ignored": false, - "docstring": "Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a 'ball' with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers. If the cost function gets stuck in a bad local minimum increasing the learning rate may help." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations for the optimization. Should be at least 250." - }, - { - "name": "n_iter_without_progress", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations without progress before we abort the optimization, used after 250 initial iterations with early exaggeration. Note that progress is only checked every 50 iterations so this value is rounded to the next multiple of 50. .. versionadded:: 0.17 parameter *n_iter_without_progress* to control stopping criteria." - }, - { - "name": "min_grad_norm", - "type": "float", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "If the gradient norm is below this threshold, the optimization will be stopped." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a distance matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them. The default is \"euclidean\" which is interpreted as squared euclidean distance." - }, - { - "name": "init", - "type": "Literal['random', 'pca']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initialization of embedding. Possible options are 'random', 'pca', and a numpy array of shape (n_samples, n_components). PCA initialization cannot be used with precomputed distances and is usually more globally stable than random initialization." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the random number generator. Pass an int for reproducible results across multiple function calls. Note that different initializations might result in different local minima of the cost function. See :term: `Glossary `." - }, - { - "name": "method", - "type": "str", - "hasDefault": true, - "default": "'barnes_hut'", - "limitation": null, - "ignored": false, - "docstring": "By default the gradient calculation algorithm uses Barnes-Hut approximation running in O(NlogN) time. method='exact' will run on the slower, but exact, algorithm in O(N^2) time. The exact algorithm should be used when nearest-neighbor errors need to be better than 3%. However, the exact method cannot scale to millions of examples. .. versionadded:: 0.17 Approximate optimization *method* via the Barnes-Hut." - }, - { - "name": "angle", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Only used if method='barnes_hut' This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. 'angle' is the angular size (referred to as theta in [3]) of a distant node as measured from a point. If this size is below 'angle' then it is used as a summary node of all points contained within it. This method is not very sensitive to changes in this parameter in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing computation time and angle greater 0.8 has quickly increasing error." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. This parameter has no impact when ``metric=\"precomputed\"`` or (``metric=\"euclidean\"`` and ``method=\"exact\"``). ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.22" - }, - { - "name": "square_distances", - "type": "Literal[True, 'legacy']", - "hasDefault": true, - "default": "'legacy'", - "limitation": null, - "ignored": false, - "docstring": "Whether TSNE should square the distance values. ``'legacy'`` means that distance values are squared only when ``metric=\"euclidean\"``. ``True`` means that distance values are squared for all metrics. .. versionadded:: 0.24 Added to provide backward compatibility during deprecation of legacy squaring behavior. .. deprecated:: 0.24 Legacy squaring behavior was deprecated in 0.24. The ``'legacy'`` value will be removed in 1.1 (renaming of 0.26), at which point the default value will change to ``True``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function to fit the model using X as training data." - }, - { - "name": "_tsne", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Runs t-SNE." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. If the method is 'exact', X may be a sparse matrix of type 'csr', 'csc' or 'coo'. If the method is 'barnes_hut' and the metric is 'precomputed', X may be a precomputed sparse graph." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit X into an embedded space and return that transformed\noutput.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. If the method is 'exact', X may be a sparse matrix of type 'csr', 'csc' or 'coo'. If the method is 'barnes_hut' and the metric is 'precomputed', X may be a precomputed sparse graph." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit X into an embedded space.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\ny : Ignored" - } - ], - "docstring": "t-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Dimension of the embedded space.\n\nperplexity : float, default=30.0\n The perplexity is related to the number of nearest neighbors that\n is used in other manifold learning algorithms. Larger datasets\n usually require a larger perplexity. Consider selecting a value\n between 5 and 50. Different values can result in significantly\n different results.\n\nearly_exaggeration : float, default=12.0\n Controls how tight natural clusters in the original space are in\n the embedded space and how much space will be between them. For\n larger values, the space between natural clusters will be larger\n in the embedded space. Again, the choice of this parameter is not\n very critical. If the cost function increases during initial\n optimization, the early exaggeration factor or the learning rate\n might be too high.\n\nlearning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers. If the cost function gets stuck in a bad local\n minimum increasing the learning rate may help.\n\nn_iter : int, default=1000\n Maximum number of iterations for the optimization. Should be at\n least 250.\n\nn_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization, used after 250 initial iterations with early\n exaggeration. Note that progress is only checked every 50 iterations so\n this value is rounded to the next multiple of 50.\n\n .. versionadded:: 0.17\n parameter *n_iter_without_progress* to control stopping criteria.\n\nmin_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be stopped.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them. The default is \"euclidean\" which is\n interpreted as squared euclidean distance.\n\ninit : {'random', 'pca'} or ndarray of shape (n_samples, n_components), default='random'\n Initialization of embedding. Possible options are 'random', 'pca',\n and a numpy array of shape (n_samples, n_components).\n PCA initialization cannot be used with precomputed distances and is\n usually more globally stable than random initialization.\n\nverbose : int, default=0\n Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator. Pass an int for reproducible\n results across multiple function calls. Note that different\n initializations might result in different local minima of the cost\n function. See :term: `Glossary `.\n\nmethod : str, default='barnes_hut'\n By default the gradient calculation algorithm uses Barnes-Hut\n approximation running in O(NlogN) time. method='exact'\n will run on the slower, but exact, algorithm in O(N^2) time. The\n exact algorithm should be used when nearest-neighbor errors need\n to be better than 3%. However, the exact method cannot scale to\n millions of examples.\n\n .. versionadded:: 0.17\n Approximate optimization *method* via the Barnes-Hut.\n\nangle : float, default=0.5\n Only used if method='barnes_hut'\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. This parameter\n has no impact when ``metric=\"precomputed\"`` or\n (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.22\n\nsquare_distances : True or 'legacy', default='legacy'\n Whether TSNE should square the distance values. ``'legacy'`` means\n that distance values are squared only when ``metric=\"euclidean\"``.\n ``True`` means that distance values are squared for all metrics.\n\n .. versionadded:: 0.24\n Added to provide backward compatibility during deprecation of\n legacy squaring behavior.\n .. deprecated:: 0.24\n Legacy squaring behavior was deprecated in 0.24. The ``'legacy'``\n value will be removed in 1.1 (renaming of 0.26), at which point the\n default value will change to ``True``.\n\nAttributes\n----------\nembedding_ : array-like of shape (n_samples, n_components)\n Stores the embedding vectors.\n\nkl_divergence_ : float\n Kullback-Leibler divergence after optimization.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.manifold import TSNE\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> X_embedded = TSNE(n_components=2).fit_transform(X)\n>>> X_embedded.shape\n(4, 2)\n\nReferences\n----------\n\n[1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n[2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n https://lvdmaaten.github.io/tsne/\n\n[3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf" - } - ], - "functions": [ - { - "name": "_joint_probabilities", - "decorators": [], - "parameters": [ - { - "name": "distances", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances of samples are stored as condensed matrices, i.e. we omit the diagonal and duplicate entries and store everything in a one-dimensional array." - }, - { - "name": "desired_perplexity", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired perplexity of the joint probability distributions." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute joint probabilities p_ij from distances.\n\nParameters\n----------\ndistances : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Distances of samples are stored as condensed matrices, i.e.\n we omit the diagonal and duplicate entries and store everything\n in a one-dimensional array.\n\ndesired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\nverbose : int\n Verbosity level.\n\nReturns\n-------\nP : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix." - }, - { - "name": "_joint_probabilities_nn", - "decorators": [], - "parameters": [ - { - "name": "distances", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances of samples to its n_neighbors nearest neighbors. All other distances are left to zero (and are not materialized in memory). Matrix should be of CSR format." - }, - { - "name": "desired_perplexity", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired perplexity of the joint probability distributions." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute joint probabilities p_ij from distances using just nearest\nneighbors.\n\nThis method is approximately equal to _joint_probabilities. The latter\nis O(N), but limiting the joint probability to nearest neighbors improves\nthis substantially to O(uN).\n\nParameters\n----------\ndistances : sparse matrix of shape (n_samples, n_samples)\n Distances of samples to its n_neighbors nearest neighbors. All other\n distances are left to zero (and are not materialized in memory).\n Matrix should be of CSR format.\n\ndesired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\nverbose : int\n Verbosity level.\n\nReturns\n-------\nP : sparse matrix of shape (n_samples, n_samples)\n Condensed joint probability matrix with only nearest neighbors. Matrix\n will be of CSR format." - }, - { - "name": "_kl_divergence", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Unraveled embedding." - }, - { - "name": "P", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Condensed joint probability matrix." - }, - { - "name": "degrees_of_freedom", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degrees of freedom of the Student's-t distribution." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension of the embedded space." - }, - { - "name": "skip_num_points", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "This does not compute the gradient for points with indices below `skip_num_points`. This is useful when computing transforms of new data where you'd like to keep the old data fixed." - }, - { - "name": "compute_error: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - }, - { - "name": "default=True", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "t-SNE objective function: gradient of the KL divergence\nof p_ijs and q_ijs and the absolute error.\n\nParameters\n----------\nparams : ndarray of shape (n_params,)\n Unraveled embedding.\n\nP : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix.\n\ndegrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\nn_samples : int\n Number of samples.\n\nn_components : int\n Dimension of the embedded space.\n\nskip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\ncompute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\nReturns\n-------\nkl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\ngrad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding." - }, - { - "name": "_kl_divergence_bh", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Unraveled embedding." - }, - { - "name": "P", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sparse approximate joint probability matrix, computed only for the k nearest-neighbors and symmetrized. Matrix should be of CSR format." - }, - { - "name": "degrees_of_freedom", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Degrees of freedom of the Student's-t distribution." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dimension of the embedded space." - }, - { - "name": "angle", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. 'angle' is the angular size (referred to as theta in [3]) of a distant node as measured from a point. If this size is below 'angle' then it is used as a summary node of all points contained within it. This method is not very sensitive to changes in this parameter in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing computation time and angle greater 0.8 has quickly increasing error." - }, - { - "name": "skip_num_points", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "This does not compute the gradient for points with indices below `skip_num_points`. This is useful when computing transforms of new data where you'd like to keep the old data fixed." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "compute_error: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - }, - { - "name": "default=True", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If False, the kl_divergence is not computed and returns NaN." - }, - { - "name": "num_threads", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of threads used to compute the gradient. This is set here to avoid calling _openmp_effective_n_threads for each gradient step." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "t-SNE objective function: KL divergence of p_ijs and q_ijs.\n\nUses Barnes-Hut tree methods to calculate the gradient that\nruns in O(NlogN) instead of O(N^2).\n\nParameters\n----------\nparams : ndarray of shape (n_params,)\n Unraveled embedding.\n\nP : sparse matrix of shape (n_samples, n_sample)\n Sparse approximate joint probability matrix, computed only for the\n k nearest-neighbors and symmetrized. Matrix should be of CSR format.\n\ndegrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\nn_samples : int\n Number of samples.\n\nn_components : int\n Dimension of the embedded space.\n\nangle : float, default=0.5\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\nskip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\nverbose : int, default=False\n Verbosity level.\n\ncompute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\nnum_threads : int, default=1\n Number of threads used to compute the gradient. This is set here to\n avoid calling _openmp_effective_n_threads for each gradient step.\n\nReturns\n-------\nkl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\ngrad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding." - }, - { - "name": "_gradient_descent", - "decorators": [], - "parameters": [ - { - "name": "objective", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return a tuple of cost and gradient for a given parameter vector. When expensive to compute, the cost can optionally be None and can be computed every n_iter_check steps using the objective_error function." - }, - { - "name": "p0", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial parameter vector." - }, - { - "name": "it", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Current number of iterations (this function will be called more than once during the optimization)." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of gradient descent iterations." - }, - { - "name": "n_iter_check", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations before evaluating the global error. If the error is sufficiently low, we abort the optimization." - }, - { - "name": "n_iter_without_progress", - "type": "int", - "hasDefault": true, - "default": "300", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations without progress before we abort the optimization." - }, - { - "name": "momentum", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The momentum generates a weight for previous gradients that decays exponentially." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a 'ball' with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers." - }, - { - "name": "min_gain", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum individual gain for each parameter." - }, - { - "name": "min_grad_norm", - "type": "float", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "If the gradient norm is below this threshold, the optimization will be aborted." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "args", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments to pass to objective function." - }, - { - "name": "kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to pass to objective function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Batch gradient descent with momentum and individual gains.\n\nParameters\n----------\nobjective : callable\n Should return a tuple of cost and gradient for a given parameter\n vector. When expensive to compute, the cost can optionally\n be None and can be computed every n_iter_check steps using\n the objective_error function.\n\np0 : array-like of shape (n_params,)\n Initial parameter vector.\n\nit : int\n Current number of iterations (this function will be called more than\n once during the optimization).\n\nn_iter : int\n Maximum number of gradient descent iterations.\n\nn_iter_check : int, default=1\n Number of iterations before evaluating the global error. If the error\n is sufficiently low, we abort the optimization.\n\nn_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization.\n\nmomentum : float within (0.0, 1.0), default=0.8\n The momentum generates a weight for previous gradients that decays\n exponentially.\n\nlearning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers.\n\nmin_gain : float, default=0.01\n Minimum individual gain for each parameter.\n\nmin_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be aborted.\n\nverbose : int, default=0\n Verbosity level.\n\nargs : sequence, default=None\n Arguments to pass to objective function.\n\nkwargs : dict, default=None\n Keyword arguments to pass to objective function.\n\nReturns\n-------\np : ndarray of shape (n_params,)\n Optimum parameters.\n\nerror : float\n Optimum.\n\ni : int\n Last iteration." - }, - { - "name": "trustworthiness", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row." - }, - { - "name": "X_embedded", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Embedding of the training data in low-dimensional space." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors k that will be considered." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Which metric to use for computing pairwise distances between samples from the original input space. If metric is 'precomputed', X must be a matrix of pairwise distances or squared distances. Otherwise, see the documentation of argument metric in sklearn.pairwise.pairwise_distances for a list of available metrics. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\n* \"Neighborhood Preservation in Nonlinear Projection Methods: An\n Experimental Study\"\n J. Venna, S. Kaski\n* \"Learning a Parametric Embedding by Preserving Local Structure\"\n L.J.P. van der Maaten\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row.\n\nX_embedded : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.\n\nn_neighbors : int, default=5\n Number of neighbors k that will be considered.\n\nmetric : str or callable, default='euclidean'\n Which metric to use for computing pairwise distances between samples\n from the original input space. If metric is 'precomputed', X must be a\n matrix of pairwise distances or squared distances. Otherwise, see the\n documentation of argument metric in sklearn.pairwise.pairwise_distances\n for a list of available metrics.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ntrustworthiness : float\n Trustworthiness of the low-dimensional embedding." - } - ] - }, - { - "name": "sklearn.manifold", - "imports": [ - "from _locally_linear import locally_linear_embedding", - "from _locally_linear import LocallyLinearEmbedding", - "from _isomap import Isomap", - "from _mds import MDS", - "from _mds import smacof", - "from _spectral_embedding import SpectralEmbedding", - "from _spectral_embedding import spectral_embedding", - "from _t_sne import TSNE", - "from _t_sne import trustworthiness" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.manifold.tests.test_isomap", - "imports": [ - "from itertools import product", - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_almost_equal", - "import pytest", - "from sklearn import datasets", - "from sklearn import manifold", - "from sklearn import neighbors", - "from sklearn import pipeline", - "from sklearn import preprocessing", - "from scipy.sparse import rand as sparse_rand" - ], - "classes": [], - "functions": [ - { - "name": "test_isomap_simple_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isomap_reconstruction_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_with_nearest_neighbors_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_different_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isomap_clone_bug", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_locally_linear", - "imports": [ - "from itertools import product", - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_almost_equal", - "from scipy import linalg", - "import pytest", - "from sklearn import neighbors", - "from sklearn import manifold", - "from sklearn.manifold._locally_linear import barycenter_kneighbors_graph", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn import pipeline", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_barycenter_kneighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lle_simple_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lle_manifold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lle_init_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singular_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_integer_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_mds", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "import pytest", - "from sklearn.manifold import _mds as mds", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_smacof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_smacof_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_MDS", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_MDS_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_MDS_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_spectral_embedding", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from scipy.sparse import csgraph", - "from scipy.linalg import eigh", - "from sklearn.manifold import SpectralEmbedding", - "from sklearn.manifold._spectral_embedding import _graph_is_connected", - "from sklearn.manifold._spectral_embedding import _graph_connected_component", - "from sklearn.manifold import spectral_embedding", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.metrics import normalized_mutual_info_score", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.cluster import KMeans", - "from sklearn.datasets import make_blobs", - "from sklearn.utils.extmath import _deterministic_vector_sign_flip", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "_assert_equal_with_sign_flipping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check array A and B are equal with possible sign flipping on\neach columns" - }, - { - "name": "test_sparse_graph_connected_component", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_two_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_precomputed_affinity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_nearest_neighbors_filtering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_callable_affinity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_amg_solver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_amg_solver_failure", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_spectral_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_unknown_eigensolver", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_unknown_affinity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_connectivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_deterministic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_unnormalized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_first_eigen_vector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.manifold.tests.test_t_sne", - "imports": [ - "import sys", - "from io import StringIO", - "import numpy as np", - "from numpy.testing import assert_allclose", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.neighbors import kneighbors_graph", - "from sklearn.exceptions import EfficiencyWarning", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils import check_random_state", - "from sklearn.manifold._t_sne import _joint_probabilities", - "from sklearn.manifold._t_sne import _joint_probabilities_nn", - "from sklearn.manifold._t_sne import _kl_divergence", - "from sklearn.manifold._t_sne import _kl_divergence_bh", - "from sklearn.manifold._t_sne import _gradient_descent", - "from sklearn.manifold._t_sne import trustworthiness", - "from sklearn.manifold import TSNE", - "from sklearn.manifold import _barnes_hut_tsne", - "from sklearn.manifold._utils import _binary_search_perplexity", - "from sklearn.datasets import make_blobs", - "from scipy.optimize import check_grad", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import squareform", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.metrics.pairwise import manhattan_distances", - "from sklearn.metrics.pairwise import cosine_distances", - "from scipy.sparse import csr_matrix" - ], - "classes": [], - "functions": [ - { - "name": "test_gradient_descent_stops", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_search_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_perplexity_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_trustworthiness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preserve_trustworthiness_approximately", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_optimization_minimizes_kl_divergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "t-SNE should give a lower KL divergence with more iterations." - }, - { - "name": "test_fit_csr_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_preserve_trustworthiness_approximately_with_precomputed_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_trustworthiness_not_euclidean_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_exaggeration_too_small", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_too_few_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_precomputed_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_exact_no_precomputed_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_high_perplexity_precomputed_sparse_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_precomputed_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that TSNE works identically for sparse and dense matrix" - }, - { - "name": "test_non_positive_computed_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_ndarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_ndarray_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_distance_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_method_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_square_distances_not_available", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_angle_out_of_range_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pca_initialization_not_compatible_with_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_exaggeration_used", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_used", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_answer_gradient_two_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_answer_gradient_four_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_skip_num_points_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_answer_test", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chebyshev_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_reduction_to_one_component", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_64bit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kl_divergence_not_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_barnes_hut_angle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_without_progress", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_grad_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_accessible_kl_divergence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that TSNE can approximately recover a uniform 2D grid\n\nDue to ties in distances between point in X_2d_grid, this test is platform\ndependent for ``method='barnes_hut'`` due to numerical imprecision.\n\nAlso, t-SNE is not assured to converge to the right solution because bad\ninitialization can lead to convergence to bad local minimum (the\noptimization problem is non-convex). To avoid breaking the test too often,\nwe re-run t-SNE from the final point when the convergence is not good\nenough." - }, - { - "name": "assert_uniform_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bh_match_exact", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient_bh_multithread_match_sequential", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne_with_different_distance_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that TSNE works for different distance metrics" - }, - { - "name": "test_tsne_different_square_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne_square_distances_futurewarning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne_n_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that the n_jobs parameter doesn't impact the output" - } - ] - }, - { - "name": "sklearn.manifold.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.pairwise", - "imports": [ - "import itertools", - "from functools import partial", - "import warnings", - "import numpy as np", - "from scipy.spatial import distance", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from utils.validation import _num_samples", - "from utils.validation import check_non_negative", - "from utils import check_array", - "from utils import gen_even_slices", - "from utils import gen_batches", - "from utils import get_chunk_n_rows", - "from utils import is_scalar_nan", - "from utils.extmath import row_norms", - "from utils.extmath import safe_sparse_dot", - "from preprocessing import normalize", - "from utils._mask import _get_mask", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils.fixes import sp_version", - "from utils.fixes import parse_version", - "from _pairwise_fast import _chi2_kernel_fast", - "from _pairwise_fast import _sparse_manhattan", - "from exceptions import DataConversionWarning", - "from sklearn.neighbors import DistanceMetric", - "from gaussian_process.kernels import Kernel as GPKernel" - ], - "classes": [], - "functions": [ - { - "name": "_return_float_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "1. If dtype of X and Y is float32, then dtype float32 is returned.\n2. Else dtype float is returned." - }, - { - "name": "check_pairwise_arrays", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "precomputed", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "True if X is to be treated as precomputed distances to the samples in Y." - }, - { - "name": "dtype", - "type": "Union[List, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data type required for X and Y. If None, the dtype will be an appropriate float type selected by _return_float_dtype. .. versionadded:: 0.18" - }, - { - "name": "accept_sparse", - "type": "Union[bool, str]", - "hasDefault": true, - "default": "'csr'", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: - True: Force all values of array to be finite. - False: accepts np.inf, np.nan, pd.NA in array. - 'allow-nan': accepts only np.nan and pd.NA values in array. Values cannot be infinite. .. versionadded:: 0.22 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set X and Y appropriately and checks inputs.\n\nIf Y is None, it is set as a pointer to X (i.e. not a copy).\nIf Y is given, this does not happen.\nAll distance metrics should use this function first to assert that the\ngiven parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats (or dtype if provided). Finally, the function\nchecks that the size of the second dimension of the two arrays is equal, or\nthe equivalent check for a precomputed distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\nprecomputed : bool, default=False\n True if X is to be treated as precomputed distances to the samples in\n Y.\n\ndtype : str, type, list of type, default=None\n Data type required for X and Y. If None, the dtype will be an\n appropriate float type selected by _return_float_dtype.\n\n .. versionadded:: 0.18\n\naccept_sparse : str, bool or list/tuple of str, default='csr'\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nsafe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\nsafe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X." - }, - { - "name": "check_paired_arrays", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set X and Y appropriately and checks inputs for paired distances.\n\nAll paired distance metrics should use this function first to assert that\nthe given parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats. Finally, the function checks that the size\nof the dimensions of the two arrays are equal.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\nReturns\n-------\nsafe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\nsafe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X." - }, - { - "name": "euclidean_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y_norm_squared", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-computed dot-products of vectors in Y (e.g., ``(Y**2).sum(axis=1)``) May be ignored in some cases, see the note below." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return squared Euclidean distances." - }, - { - "name": "X_norm_squared", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pre-computed dot-products of vectors in X (e.g., ``(X**2).sum(axis=1)``) May be ignored in some cases, see the note below." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Considering the rows of X (and Y=X) as vectors, compute the\ndistance matrix between each pair of vectors.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\n dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features), default=None\n\nY_norm_squared : array-like of shape (n_samples_Y,), default=None\n Pre-computed dot-products of vectors in Y (e.g.,\n ``(Y**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\nsquared : bool, default=False\n Return squared Euclidean distances.\n\nX_norm_squared : array-like of shape (n_samples,), default=None\n Pre-computed dot-products of vectors in X (e.g.,\n ``(X**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\nNotes\n-----\nTo achieve better accuracy, `X_norm_squared`\u00a0and `Y_norm_squared` may be\nunused if they are passed as ``float32``.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\npaired_distances : Distances betweens pairs of elements of X and Y.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import euclidean_distances\n>>> X = [[0, 1], [1, 1]]\n>>> # distance between rows of X\n>>> euclidean_distances(X, X)\narray([[0., 1.],\n [1., 0.]])\n>>> # get distance to origin\n>>> euclidean_distances(X, [[0, 0]])\narray([[1. ],\n [1.41421356]])" - }, - { - "name": "nan_euclidean_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return squared Euclidean distances." - }, - { - "name": "missing_values", - "type": "int", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Representation of missing value." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Make and use a deep copy of X and Y (if Y exists)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\n dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n where,\n weight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n .. math::\n \\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nX : array-like of shape=(n_samples_X, n_features)\n\nY : array-like of shape=(n_samples_Y, n_features), default=None\n\nsquared : bool, default=False\n Return squared Euclidean distances.\n\nmissing_values : np.nan or int, default=np.nan\n Representation of missing value.\n\ncopy : bool, default=True\n Make and use a deep copy of X and Y (if Y exists).\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\npaired_distances : Distances between pairs of elements of X and Y.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import nan_euclidean_distances\n>>> nan = float(\"NaN\")\n>>> X = [[0, 1], [1, nan]]\n>>> nan_euclidean_distances(X, X) # distance between rows of X\narray([[0. , 1.41421356],\n [1.41421356, 0. ]])\n\n>>> # get distance to origin\n>>> nan_euclidean_distances(X, [[0, 0]])\narray([[1. ],\n [1.41421356]])\n\nReferences\n----------\n* John K. Dixon, \"Pattern Recognition with Partly Missing Data\",\n IEEE Transactions on Systems, Man, and Cybernetics, Volume: 9, Issue:\n 10, pp. 617 - 621, Oct. 1979.\n http://ieeexplore.ieee.org/abstract/document/4310090/" - }, - { - "name": "_euclidean_distances_upcast", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Euclidean distances between X and Y.\n\nAssumes X and Y have float32 dtype.\nAssumes XX and YY have float64 dtype or are None.\n\nX and Y are upcast to float64 by chunks, which size is chosen to limit\nmemory increase by approximately 10% (at least 10MiB)." - }, - { - "name": "_argmin_min_reduce", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "pairwise_distances_argmin_min", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing points." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing points." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the argmin and distances are to be computed." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "metric_kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to pass to specified metric function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Array containing points.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n Array containing points.\n\naxis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default='euclidean'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\nmetric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\ndistances : ndarray\n distances[i] is the distance between the i-th row in X and the\n argmin[i]-th row in Y.\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin" - }, - { - "name": "pairwise_distances_argmin", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array containing points." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arrays containing points." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the argmin and distances are to be computed." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"euclidean\"", - "limitation": null, - "ignored": false, - "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "metric_kwargs", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to pass to specified metric function." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n Array containing points.\n\nY : array-like of shape (n_samples_Y, n_features)\n Arrays containing points.\n\naxis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default=\"euclidean\"\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\nmetric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : numpy.ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin_min" - }, - { - "name": "haversine_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\n D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]\n\nParameters\n----------\nX : array-like of shape (n_samples_X, 2)\n\nY : array-like of shape (n_samples_Y, 2), default=None\n\nReturns\n-------\ndistance : ndarray of shape (n_samples_X, n_samples_Y)\n\nNotes\n-----\nAs the Earth is nearly spherical, the haversine formula provides a good\napproximation of the distance between two points of the Earth surface, with\na less than 1% error on average.\n\nExamples\n--------\nWe want to calculate the distance between the Ezeiza Airport\n(Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,\nFrance).\n\n>>> from sklearn.metrics.pairwise import haversine_distances\n>>> from math import radians\n>>> bsas = [-34.83333, -58.5166646]\n>>> paris = [49.0083899664, 2.53844117956]\n>>> bsas_in_radians = [radians(_) for _ in bsas]\n>>> paris_in_radians = [radians(_) for _ in paris]\n>>> result = haversine_distances([bsas_in_radians, paris_in_radians])\n>>> result * 6371000/1000 # multiply by Earth radius to get kilometers\narray([[ 0. , 11099.54035582],\n [11099.54035582, 0. ]])" - }, - { - "name": "manhattan_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sum_over_features", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True the function returns the pairwise distance matrix else it returns the componentwise L1 pairwise-distances. Not supported for sparse matrix inputs." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : array-like of shape (n_samples_Y, n_features), default=None\n\nsum_over_features : bool, default=True\n If True the function returns the pairwise distance matrix\n else it returns the componentwise L1 pairwise-distances.\n Not supported for sparse matrix inputs.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X * n_samples_Y, n_features) or (n_samples_X, n_samples_Y)\n If sum_over_features is False shape is\n (n_samples_X * n_samples_Y, n_features) and D contains the\n componentwise L1 pairwise-distances (ie. absolute difference),\n else shape is (n_samples_X, n_samples_Y) and D contains\n the pairwise L1 distances.\n\nNotes\n--------\nWhen X and/or Y are CSR sparse matrices and they are not already\nin canonical format, this function modifies them in-place to\nmake them canonical.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import manhattan_distances\n>>> manhattan_distances([[3]], [[3]])\narray([[0.]])\n>>> manhattan_distances([[3]], [[2]])\narray([[1.]])\n>>> manhattan_distances([[2]], [[3]])\narray([[1.]])\n>>> manhattan_distances([[1, 2], [3, 4]], [[1, 2], [0, 3]])\narray([[0., 2.],\n [4., 4.]])\n>>> import numpy as np\n>>> X = np.ones((1, 2))\n>>> y = np.full((2, 2), 2.)\n>>> manhattan_distances(X, y, sum_over_features=False)\narray([[1., 1.],\n [1., 1.]])" - }, - { - "name": "cosine_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix `X`." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix `Y`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute cosine distance between samples in X and Y.\n\nCosine distance is defined as 1.0 minus the cosine similarity.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Matrix `X`.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features), default=None\n Matrix `Y`.\n\nReturns\n-------\ndistance matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\ncosine_similarity\nscipy.spatial.distance.cosine : Dense matrices only." - }, - { - "name": "paired_euclidean_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the paired euclidean distances between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)" - }, - { - "name": "paired_manhattan_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)" - }, - { - "name": "paired_cosine_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the paired cosine distances between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)\n\nNotes\n-----\nThe cosine distance is equivalent to the half the squared\neuclidean distance if each sample is normalized to unit norm." - }, - { - "name": "paired_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array 1 for distance computation." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array 2 for distance computation." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"euclidean\"", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options specified in PAIRED_DISTANCES, including \"euclidean\", \"manhattan\", or \"cosine\". Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the paired distances between X and Y.\n\nComputes the distances between (X[0], Y[0]), (X[1], Y[1]), etc...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Array 1 for distance computation.\n\nY : ndarray of shape (n_samples, n_features)\n Array 2 for distance computation.\n\nmetric : str or callable, default=\"euclidean\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n specified in PAIRED_DISTANCES, including \"euclidean\",\n \"manhattan\", or \"cosine\".\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)\n\nSee Also\n--------\npairwise_distances : Computes the distance between every pair of samples.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import paired_distances\n>>> X = [[0, 1], [1, 1]]\n>>> Y = [[0, 1], [2, 1]]\n>>> paired_distances(X, Y)\narray([0., 1.])" - }, - { - "name": "linear_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the linear kernel between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "polynomial_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the polynomial kernel between X and Y::\n\n K(X, Y) = (gamma + coef0)^degree\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndegree : int, default=3\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "sigmoid_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the sigmoid kernel between X and Y::\n\n K(X, Y) = tanh(gamma + coef0)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "rbf_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the rbf (gaussian) kernel between X and Y::\n\n K(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "laplacian_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If None, defaults to 1.0 / n_features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\n K(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "cosine_similarity", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``." - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. .. versionadded:: 0.17 parameter ``dense_output`` for dense output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\n K(X, Y) = / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n Input data.\n\nY : {ndarray, sparse matrix} of shape (n_samples_Y, n_features), default=None\n Input data. If ``None``, the output will be the pairwise\n similarities between all samples in ``X``.\n\ndense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.17\n parameter ``dense_output`` for dense output.\n\nReturns\n-------\nkernel matrix : ndarray of shape (n_samples_X, n_samples_Y)" - }, - { - "name": "additive_chi2_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.\n\nNotes\n-----\nAs the negative of a distance, this kernel is only conditionally positive\ndefinite.\n\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nchi2_kernel : The exponentiated version of the kernel, which is usually\n preferable.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf" - }, - { - "name": "chi2_kernel", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Scaling parameter of the chi2 kernel." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=1.\n Scaling parameter of the chi2 kernel.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nadditive_chi2_kernel : The additive version of this kernel.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to the additive version of this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf" - }, - { - "name": "distance_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Valid metrics for pairwise_distances.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists to allow for a description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n\n=============== ========================================\nmetric Function\n=============== ========================================\n'cityblock' metrics.pairwise.manhattan_distances\n'cosine' metrics.pairwise.cosine_distances\n'euclidean' metrics.pairwise.euclidean_distances\n'haversine' metrics.pairwise.haversine_distances\n'l1' metrics.pairwise.manhattan_distances\n'l2' metrics.pairwise.euclidean_distances\n'manhattan' metrics.pairwise.manhattan_distances\n'nan_euclidean' metrics.pairwise.nan_euclidean_distances\n=============== ========================================\n\nRead more in the :ref:`User Guide `." - }, - { - "name": "_dist_wrapper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Write in-place to a slice of a distance matrix." - }, - { - "name": "_parallel_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Break the pairwise matrix in n_jobs even slices\nand compute them in parallel." - }, - { - "name": "_pairwise_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Handle the callable case for pairwise_{distances,kernels}.\n " - }, - { - "name": "_check_chunk_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Checks chunk is a sequence of expected size or a tuple of same.\n " - }, - { - "name": "_precompute_metric_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Precompute data-derived metric parameters if not provided.\n " - }, - { - "name": "pairwise_distances_chunked", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of pairwise distances between samples, or a feature array. The shape the array should be (n_samples_X, n_samples_X) if metric='precomputed' and (n_samples_X, n_features) otherwise." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional second feature array. Only allowed if metric != \"precomputed\"." - }, - { - "name": "reduce_func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function which is applied on each chunk of the distance matrix, reducing it to needed values. ``reduce_func(D_chunk, start)`` is called repeatedly, where ``D_chunk`` is a contiguous vertical slice of the pairwise distance matrix, starting at row ``start``. It should return one of: None; an array, a list, or a sparse matrix of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning None is useful for in-place operations, rather than reductions. If None, pairwise_distances_chunked returns a generator of vertical chunks of the distance matrix." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a distance matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "working_memory", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used." - }, - { - "name": "`**kwds`", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a scipy.spatial.distance metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be stored at\nonce, this is used to calculate pairwise distances in\n``working_memory``-sized chunks. If ``reduce_func`` is given, it is run\non each chunk and its return values are concatenated into lists, arrays\nor sparse matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape the array should be (n_samples_X, n_samples_X) if\n metric='precomputed' and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\nreduce_func : callable, default=None\n The function which is applied on each chunk of the distance matrix,\n reducing it to needed values. ``reduce_func(D_chunk, start)``\n is called repeatedly, where ``D_chunk`` is a contiguous vertical\n slice of the pairwise distance matrix, starting at row ``start``.\n It should return one of: None; an array, a list, or a sparse matrix\n of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\n None is useful for in-place operations, rather than reductions.\n\n If None, pairwise_distances_chunked returns a generator of vertical\n chunks of the distance matrix.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\n`**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nYields\n------\nD_chunk : {ndarray, sparse matrix}\n A contiguous slice of distance matrix, optionally processed by\n ``reduce_func``.\n\nExamples\n--------\nWithout reduce_func:\n\n>>> import numpy as np\n>>> from sklearn.metrics import pairwise_distances_chunked\n>>> X = np.random.RandomState(0).rand(5, 3)\n>>> D_chunk = next(pairwise_distances_chunked(X))\n>>> D_chunk\narray([[0. ..., 0.29..., 0.41..., 0.19..., 0.57...],\n [0.29..., 0. ..., 0.57..., 0.41..., 0.76...],\n [0.41..., 0.57..., 0. ..., 0.44..., 0.90...],\n [0.19..., 0.41..., 0.44..., 0. ..., 0.51...],\n [0.57..., 0.76..., 0.90..., 0.51..., 0. ...]])\n\nRetrieve all neighbors and average distance within radius r:\n\n>>> r = .2\n>>> def reduce_func(D_chunk, start):\n... neigh = [np.flatnonzero(d < r) for d in D_chunk]\n... avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n... return neigh, avg_dist\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n>>> neigh, avg_dist = next(gen)\n>>> neigh\n[array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n>>> avg_dist\narray([0.039..., 0. , 0. , 0.039..., 0. ])\n\nWhere r is defined per sample, we need to make use of ``start``:\n\n>>> r = [.2, .4, .4, .3, .1]\n>>> def reduce_func(D_chunk, start):\n... neigh = [np.flatnonzero(d < r[i])\n... for i, d in enumerate(D_chunk, start)]\n... return neigh\n>>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n>>> neigh\n[array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\nForce row-by-row generation by reducing ``working_memory``:\n\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n... working_memory=0)\n>>> next(gen)\n[array([0, 3])]\n>>> next(gen)\n[array([0, 1])]" - }, - { - "name": "pairwise_distances", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of pairwise distances between samples, or a feature array. The shape of the array should be (n_samples_X, n_samples_X) if metric == \"precomputed\" and (n_samples_X, n_features) otherwise." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An optional second feature array. Only allowed if metric != \"precomputed\"." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. If metric is \"precomputed\", X is assumed to be a distance matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The possibilities are: - True: Force all values of array to be finite. - False: accepts np.inf, np.nan, pd.NA in array. - 'allow-nan': accepts only np.nan and pd.NA values in array. Values cannot be infinite. .. versionadded:: 0.22 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`." - }, - { - "name": "**kwds", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a scipy.spatial.distance metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']. These metrics support sparse matrix\n inputs.\n ['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n See the documentation for scipy.spatial.distance for details on these\n metrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\n for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_samples_Y)\n A distance matrix D such that D_{i, j} is the distance between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then D_{i, j} is the distance between the ith array\n from X and the jth array from Y.\n\nSee Also\n--------\npairwise_distances_chunked : Performs the same calculation as this\n function, but returns a generator of chunks of the distance matrix, in\n order to limit memory usage.\npaired_distances : Computes the distances between corresponding elements\n of two arrays." - }, - { - "name": "kernel_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n =============== ========================================\n metric Function\n =============== ========================================\n 'additive_chi2' sklearn.pairwise.additive_chi2_kernel\n 'chi2' sklearn.pairwise.chi2_kernel\n 'linear' sklearn.pairwise.linear_kernel\n 'poly' sklearn.pairwise.polynomial_kernel\n 'polynomial' sklearn.pairwise.polynomial_kernel\n 'rbf' sklearn.pairwise.rbf_kernel\n 'laplacian' sklearn.pairwise.laplacian_kernel\n 'sigmoid' sklearn.pairwise.sigmoid_kernel\n 'cosine' sklearn.pairwise.cosine_similarity\n =============== ========================================\n\nRead more in the :ref:`User Guide `." - }, - { - "name": "pairwise_kernels", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of pairwise kernels between samples, or a feature array. The shape of the array should be (n_samples_X, n_samples_X) if metric == \"precomputed\" and (n_samples_X, n_features) otherwise." - }, - { - "name": "Y", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A second feature array only if X has shape (n_samples_X, n_features)." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "\"linear\"", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two rows from X as input and return the corresponding kernel value as a single number. This means that callables from :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on matrices, not single samples. Use the string identifying the kernel instead." - }, - { - "name": "filter_params", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to filter invalid parameters or not." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "**kwds", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the kernel function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n 'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise kernels between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n A second feature array only if X has shape (n_samples_X, n_features).\n\nmetric : str or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two rows from X as input and return the corresponding\n kernel value as a single number. This means that callables from\n :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on\n matrices, not single samples. Use the string identifying the kernel\n instead.\n\nfilter_params : bool, default=False\n Whether to filter invalid parameters or not.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the kernel function.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_samples_Y)\n A kernel matrix K such that K_{i, j} is the kernel between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then K_{i, j} is the kernel between the ith array\n from X and the jth array from Y.\n\nNotes\n-----\nIf metric is 'precomputed', Y is ignored and X is returned." - } - ] - }, - { - "name": "sklearn.metrics.setup", - "imports": [ - "import os", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._base", - "imports": [ - "from itertools import combinations", - "import numpy as np", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils.multiclass import type_of_target" - ], - "classes": [], - "functions": [ - { - "name": "_average_binary_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary label indicators." - }, - { - "name": "y_score", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or binary decisions." - }, - { - "name": "average", - "type": "str", - "hasDefault": true, - "default": "'macro'", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'micro'``: Calculate metrics globally by considering each element of the label indicator matrix as a label. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). ``'samples'``: Calculate metrics for each instance, and find their average. Will be ignored when ``y_true`` is binary." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "binary_metric", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The binary metric function to use." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average a binary metric for multilabel classification.\n\nParameters\n----------\ny_true : array, shape = [n_samples] or [n_samples, n_classes]\n True binary labels in binary label indicators.\n\ny_score : array, shape = [n_samples] or [n_samples, n_classes]\n Target scores, can either be probability estimates of the positive\n class, confidence values, or binary decisions.\n\naverage : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nbinary_metric : callable, returns shape [n_classes]\n The binary metric function to use.\n\nReturns\n-------\nscore : float or array of shape [n_classes]\n If not ``None``, average the score, else return the score for each\n classes." - }, - { - "name": "_average_multiclass_ovo_score", - "decorators": [], - "parameters": [ - { - "name": "binary_metric", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The binary metric function to use that accepts the following as input: y_true_target : array, shape = [n_samples_target] Some sub-array of y_true for a pair of classes designated positive and negative in the one-vs-one scheme. y_score_target : array, shape = [n_samples_target] Scores corresponding to the probability estimates of a sample belonging to the designated positive class label" - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True multiclass labels." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores corresponding to probability estimates of a sample belonging to a particular class." - }, - { - "name": "average", - "type": "Literal['macro', 'weighted']", - "hasDefault": true, - "default": "'macro'", - "limitation": null, - "ignored": false, - "docstring": "Determines the type of averaging performed on the pairwise binary metric scores: ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. Classes are assumed to be uniformly distributed. ``'weighted'``: Calculate metrics for each label, taking into account the prevalence of the classes." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average one-versus-one scores for multiclass classification.\n\nUses the binary metric for one-vs-one multiclass classification,\nwhere the score is computed according to the Hand & Till (2001) algorithm.\n\nParameters\n----------\nbinary_metric : callable\n The binary metric function to use that accepts the following as input:\n y_true_target : array, shape = [n_samples_target]\n Some sub-array of y_true for a pair of classes designated\n positive and negative in the one-vs-one scheme.\n y_score_target : array, shape = [n_samples_target]\n Scores corresponding to the probability estimates\n of a sample belonging to the designated positive class label\n\ny_true : array-like of shape (n_samples,)\n True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class.\n\naverage : {'macro', 'weighted'}, default='macro'\n Determines the type of averaging performed on the pairwise binary\n metric scores:\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\nReturns\n-------\nscore : float\n Average of the pairwise binary metric scores." - }, - { - "name": "_check_pos_label_consistency", - "decorators": [], - "parameters": [ - { - "name": "pos_label", - "type": "Optional[Union[str, int]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The positive label." - }, - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target vector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if `pos_label` need to be specified or not.\n\nIn binary classification, we fix `pos_label=1` if the labels are in the set\n{-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the\n`pos_label` parameters.\n\nParameters\n----------\npos_label : int, str or None\n The positive label.\ny_true : ndarray of shape (n_samples,)\n The target vector.\n\nReturns\n-------\npos_label : int\n If `pos_label` can be inferred, it will be returned.\n\nRaises\n------\nValueError\n In the case that `y_true` does not have label in {-1, 1} or {0, 1},\n it will raise a `ValueError`." - } - ] - }, - { - "name": "sklearn.metrics._classification", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csr_matrix", - "from preprocessing import LabelBinarizer", - "from preprocessing import LabelEncoder", - "from utils import assert_all_finite", - "from utils import check_array", - "from utils import check_consistent_length", - "from utils import column_or_1d", - "from utils.multiclass import unique_labels", - "from utils.multiclass import type_of_target", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.sparsefuncs import count_nonzero", - "from exceptions import UndefinedMetricWarning", - "from _base import _check_pos_label_consistency" - ], - "classes": [], - "functions": [ - { - "name": "_check_zero_division", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_targets", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that y_true and y_pred belong to the same classification task.\n\nThis converts multiclass or binary types to a common shape, and raises a\nValueError for a mix of multilabel and multiclass targets, a mix of\nmultilabel formats, for the presence of continuous-valued or multioutput\ntargets, or for targets of different lengths.\n\nColumn vectors are squeezed to 1d, while multilabel formats are returned\nas CSR sparse label indicators.\n\nParameters\n----------\ny_true : array-like\n\ny_pred : array-like\n\nReturns\n-------\ntype_true : one of {'multilabel-indicator', 'multiclass', 'binary'}\n The type of the true target data, as output by\n ``utils.multiclass.type_of_target``.\n\ny_true : array or indicator matrix\n\ny_pred : array or indicator matrix" - }, - { - "name": "_weighted_sum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "accuracy_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, return the number of correctly classified samples. Otherwise, return the fraction of correctly classified samples." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Accuracy classification score.\n\nIn multilabel classification, this function computes subset accuracy:\nthe set of labels predicted for a sample must *exactly* match the\ncorresponding set of labels in y_true.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n If ``False``, return the number of correctly classified samples.\n Otherwise, return the fraction of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n If ``normalize == True``, return the fraction of correctly\n classified samples (float), else returns the number of correctly\n classified samples (int).\n\n The best performance is 1 with ``normalize == True`` and the number\n of samples with ``normalize == False``.\n\nSee Also\n--------\njaccard_score, hamming_loss, zero_one_loss\n\nNotes\n-----\nIn binary and multiclass classification, this function is equal\nto the ``jaccard_score`` function.\n\nExamples\n--------\n>>> from sklearn.metrics import accuracy_score\n>>> y_pred = [0, 2, 1, 3]\n>>> y_true = [0, 1, 2, 3]\n>>> accuracy_score(y_true, y_pred)\n0.5\n>>> accuracy_score(y_true, y_pred, normalize=False)\n2\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5" - }, - { - "name": "confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index the matrix. This may be used to reorder or select a subset of labels. If ``None`` is given, those that appear at least once in ``y_true`` or ``y_pred`` are used in sorted order." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.18" - }, - { - "name": "normalize", - "type": "Literal['true', 'pred', 'all']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute confusion matrix to evaluate the accuracy of a classification.\n\nBy definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\nis equal to the number of observations known to be in group :math:`i` and\npredicted to be in group :math:`j`.\n\nThus in binary classification, the count of true negatives is\n:math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is\n:math:`C_{1,1}` and false positives is :math:`C_{0,1}`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated targets as returned by a classifier.\n\nlabels : array-like of shape (n_classes), default=None\n List of labels to index the matrix. This may be used to reorder\n or select a subset of labels.\n If ``None`` is given, those that appear at least once\n in ``y_true`` or ``y_pred`` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nnormalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\nReturns\n-------\nC : ndarray of shape (n_classes, n_classes)\n Confusion matrix whose i-th row and j-th\n column entry indicates the number of\n samples with true label being i-th class\n and predicted label being j-th class.\n\nSee Also\n--------\nplot_confusion_matrix : Plot Confusion Matrix.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Confusion matrix\n `_\n (Wikipedia and other references may use a different\n convention for axes).\n\nExamples\n--------\n>>> from sklearn.metrics import confusion_matrix\n>>> y_true = [2, 0, 2, 2, 0, 1]\n>>> y_pred = [0, 0, 2, 2, 0, 2]\n>>> confusion_matrix(y_true, y_pred)\narray([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\n>>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n>>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n>>> confusion_matrix(y_true, y_pred, labels=[\"ant\", \"bird\", \"cat\"])\narray([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\nIn the binary case, we can extract true positives, etc as follows:\n\n>>> tn, fp, fn, tp = confusion_matrix([0, 1, 0, 1], [1, 1, 1, 0]).ravel()\n>>> (tn, fp, fn, tp)\n(0, 2, 1, 1)" - }, - { - "name": "multilabel_confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of classes or column indices to select some (or to force inclusion of classes absent from the data)." - }, - { - "name": "samplewise", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "In the multilabel case, this calculates a confusion matrix per sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute a confusion matrix for each class or sample.\n\n.. versionadded:: 0.21\n\nCompute class-wise (default) or sample-wise (samplewise=True) multilabel\nconfusion matrix to evaluate the accuracy of a classification, and output\nconfusion matrices for each class or sample.\n\nIn multilabel confusion matrix :math:`MCM`, the count of true negatives\nis :math:`MCM_{:,0,0}`, false negatives is :math:`MCM_{:,1,0}`,\ntrue positives is :math:`MCM_{:,1,1}` and false positives is\n:math:`MCM_{:,0,1}`.\n\nMulticlass data will be treated as if binarized under a one-vs-rest\ntransformation. Returned confusion matrices will be in the order of\nsorted unique labels in the union of (y_true, y_pred).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nlabels : array-like of shape (n_classes,), default=None\n A list of classes or column indices to select some (or to force\n inclusion of classes absent from the data).\n\nsamplewise : bool, default=False\n In the multilabel case, this calculates a confusion matrix per sample.\n\nReturns\n-------\nmulti_confusion : ndarray of shape (n_outputs, 2, 2)\n A 2x2 confusion matrix corresponding to each output in the input.\n When calculating class-wise multi_confusion (default), then\n n_outputs = n_labels; when calculating sample-wise multi_confusion\n (samplewise=True), n_outputs = n_samples. If ``labels`` is defined,\n the results will be returned in the order specified in ``labels``,\n otherwise the results will be returned in sorted order by default.\n\nSee Also\n--------\nconfusion_matrix\n\nNotes\n-----\nThe multilabel_confusion_matrix calculates class-wise or sample-wise\nmultilabel confusion matrices, and in multiclass tasks, labels are\nbinarized under a one-vs-rest way; while confusion_matrix calculates\none confusion matrix for confusion between every two classes.\n\nExamples\n--------\nMultilabel-indicator case:\n\n>>> import numpy as np\n>>> from sklearn.metrics import multilabel_confusion_matrix\n>>> y_true = np.array([[1, 0, 1],\n... [0, 1, 0]])\n>>> y_pred = np.array([[1, 0, 0],\n... [0, 1, 1]])\n>>> multilabel_confusion_matrix(y_true, y_pred)\narray([[[1, 0],\n [0, 1]],\n\n [[1, 0],\n [0, 1]],\n\n [[0, 1],\n [1, 0]]])\n\nMulticlass case:\n\n>>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n>>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n>>> multilabel_confusion_matrix(y_true, y_pred,\n... labels=[\"ant\", \"bird\", \"cat\"])\narray([[[3, 1],\n [0, 2]],\n\n [[5, 0],\n [1, 0]],\n\n [[2, 1],\n [1, 2]]])" - }, - { - "name": "cohen_kappa_score", - "decorators": [], - "parameters": [ - { - "name": "y1", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels assigned by the first annotator." - }, - { - "name": "y2", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels assigned by the second annotator. The kappa statistic is symmetric, so swapping ``y1`` and ``y2`` doesn't change the value." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index the matrix. This may be used to select a subset of labels. If None, all labels that appear at least once in ``y1`` or ``y2`` are used." - }, - { - "name": "weights", - "type": "Literal['linear', 'quadratic']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weighting type to calculate the score. None means no weighted; \"linear\" means linear weighted; \"quadratic\" means quadratic weighted." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n \\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny1 : array of shape (n_samples,)\n Labels assigned by the first annotator.\n\ny2 : array of shape (n_samples,)\n Labels assigned by the second annotator. The kappa statistic is\n symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.\n\nlabels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to select a\n subset of labels. If None, all labels that appear at least once in\n ``y1`` or ``y2`` are used.\n\nweights : {'linear', 'quadratic'}, default=None\n Weighting type to calculate the score. None means no weighted;\n \"linear\" means linear weighted; \"quadratic\" means quadratic weighted.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nkappa : float\n The kappa statistic, which is a number between -1 and 1. The maximum\n value means complete agreement; zero or lower means chance agreement.\n\nReferences\n----------\n.. [1] J. Cohen (1960). \"A coefficient of agreement for nominal scales\".\n Educational and Psychological Measurement 20(1):37-46.\n doi:10.1177/001316446002000104.\n.. [2] `R. Artstein and M. Poesio (2008). \"Inter-coder agreement for\n computational linguistics\". Computational Linguistics 34(4):555-596\n `_.\n.. [3] `Wikipedia entry for the Cohen's kappa\n `_." - }, - { - "name": "jaccard_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Optional[Literal['micro', 'macro', 'samples', 'weighted', 'binary']]", - "hasDefault": true, - "default": "'binary'", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[0.0, 1.0]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division, i.e. when there there are no negative values in predictions and labels. If set to \"warn\", this acts like 0, but a warning is also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Jaccard similarity coefficient score.\n\nThe Jaccard index [1], or Jaccard similarity coefficient, defined as\nthe size of the intersection divided by the size of the union of two label\nsets, is used to compare set of predicted labels for a sample to the\ncorresponding set of labels in ``y_true``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nlabels : array-like of shape (n_classes,), default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {None, 'micro', 'macro', 'samples', 'weighted', 'binary'}, default='binary'\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", {0.0, 1.0}, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when there\n there are no negative values in predictions and labels. If set to\n \"warn\", this acts like 0, but a warning is also raised.\n\nReturns\n-------\nscore : float (if average is not None) or array of floats, shape = [n_unique_labels]\n\nSee Also\n--------\naccuracy_score, f_score, multilabel_confusion_matrix\n\nNotes\n-----\n:func:`jaccard_score` may be a poor metric if there are no\npositives for some samples or classes. Jaccard is undefined if there are\nno true or predicted labels, and our implementation will return a score\nof 0 with a warning.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Jaccard index\n `_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import jaccard_score\n>>> y_true = np.array([[0, 1, 1],\n... [1, 1, 0]])\n>>> y_pred = np.array([[1, 1, 1],\n... [1, 0, 0]])\n\nIn the binary case:\n\n>>> jaccard_score(y_true[0], y_pred[0])\n0.6666...\n\nIn the multilabel case:\n\n>>> jaccard_score(y_true, y_pred, average='samples')\n0.5833...\n>>> jaccard_score(y_true, y_pred, average='macro')\n0.6666...\n>>> jaccard_score(y_true, y_pred, average=None)\narray([0.5, 0.5, 1. ])\n\nIn the multiclass case:\n\n>>> y_pred = [0, 2, 1, 2]\n>>> y_true = [0, 1, 2, 2]\n>>> jaccard_score(y_true, y_pred, average=None)\narray([1. , 0. , 0.33...])" - }, - { - "name": "matthews_corrcoef", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Matthews correlation coefficient (MCC).\n\nThe Matthews correlation coefficient is used in machine learning as a\nmeasure of the quality of binary and multiclass classifications. It takes\ninto account true and false positives and negatives and is generally\nregarded as a balanced measure which can be used even if the classes are of\nvery different sizes. The MCC is in essence a correlation coefficient value\nbetween -1 and +1. A coefficient of +1 represents a perfect prediction, 0\nan average random prediction and -1 an inverse prediction. The statistic\nis also known as the phi coefficient. [source: Wikipedia]\n\nBinary and multiclass labels are supported. Only in the binary case does\nthis relate to information about true and false positives and negatives.\nSee references below.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array, shape = [n_samples]\n Ground truth (correct) target values.\n\ny_pred : array, shape = [n_samples]\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nmcc : float\n The Matthews correlation coefficient (+1 represents a perfect\n prediction, 0 an average random prediction and -1 and inverse\n prediction).\n\nReferences\n----------\n.. [1] `Baldi, Brunak, Chauvin, Andersen and Nielsen, (2000). Assessing the\n accuracy of prediction algorithms for classification: an overview\n `_.\n\n.. [2] `Wikipedia entry for the Matthews Correlation Coefficient\n `_.\n\n.. [3] `Gorodkin, (2004). Comparing two K-category assignments by a\n K-category correlation coefficient\n `_.\n\n.. [4] `Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN\n Error Measures in MultiClass Prediction\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import matthews_corrcoef\n>>> y_true = [+1, +1, +1, -1]\n>>> y_pred = [+1, -1, +1, +1]\n>>> matthews_corrcoef(y_true, y_pred)\n-0.33..." - }, - { - "name": "zero_one_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, return the number of misclassifications. Otherwise, return the fraction of misclassifications." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Zero-one classification loss.\n\nIf normalize is ``True``, return the fraction of misclassifications\n(float), else it returns the number of misclassifications (int). The best\nperformance is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n If ``False``, return the number of misclassifications.\n Otherwise, return the fraction of misclassifications.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float or int,\n If ``normalize == True``, return the fraction of misclassifications\n (float), else it returns the number of misclassifications (int).\n\nNotes\n-----\nIn multilabel classification, the zero_one_loss function corresponds to\nthe subset zero-one loss: for each sample, the entire set of labels must be\ncorrectly predicted, otherwise the loss for that sample is equal to one.\n\nSee Also\n--------\naccuracy_score, hamming_loss, jaccard_score\n\nExamples\n--------\n>>> from sklearn.metrics import zero_one_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> zero_one_loss(y_true, y_pred)\n0.25\n>>> zero_one_loss(y_true, y_pred, normalize=False)\n1\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5" - }, - { - "name": "f1_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division, i.e. when all predictions and labels are negative. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a weighted average of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\n F1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples','weighted', 'binary'} or None, default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\nReturns\n-------\nf1_score : float or array of float, shape = [n_unique_labels]\n F1 score of the positive class in binary classification or weighted\n average of the F1 scores of each class for the multiclass task.\n\nSee Also\n--------\nfbeta_score, precision_recall_fscore_support, jaccard_score,\nmultilabel_confusion_matrix\n\nReferences\n----------\n.. [1] `Wikipedia entry for the F1-score\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import f1_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> f1_score(y_true, y_pred, average='macro')\n0.26...\n>>> f1_score(y_true, y_pred, average='micro')\n0.33...\n>>> f1_score(y_true, y_pred, average='weighted')\n0.26...\n>>> f1_score(y_true, y_pred, average=None)\narray([0.8, 0. , 0. ])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> f1_score(y_true, y_pred, zero_division=1)\n1.0...\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``." - }, - { - "name": "fbeta_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "beta", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the weight of recall in the combined score." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division, i.e. when all predictions and labels are negative. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the F-beta score.\n\nThe F-beta score is the weighted harmonic mean of precision and recall,\nreaching its optimal value at 1 and its worst value at 0.\n\nThe `beta` parameter determines the weight of recall in the combined\nscore. ``beta < 1`` lends more weight to precision, while ``beta > 1``\nfavors recall (``beta -> 0`` considers only precision, ``beta -> +inf``\nonly recall).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nbeta : float\n Determines the weight of recall in the combined score.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\nReturns\n-------\nfbeta_score : float (if average is not None) or array of float, shape = [n_unique_labels]\n F-beta score of the positive class in binary classification or weighted\n average of the F-beta score of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, multilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false positive == 0`` or\n``true positive + false negative == 0``, f-score returns 0 and raises\n``UndefinedMetricWarning``. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011).\n Modern Information Retrieval. Addison Wesley, pp. 327-328.\n\n.. [2] `Wikipedia entry for the F1-score\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)\n0.23...\n>>> fbeta_score(y_true, y_pred, average='micro', beta=0.5)\n0.33...\n>>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5)\n0.23...\n>>> fbeta_score(y_true, y_pred, average=None, beta=0.5)\narray([0.71..., 0. , 0. ])" - }, - { - "name": "_prf_divide", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs division and handles divide-by-zero.\n\nOn zero-division, sets the corresponding result elements equal to\n0 or 1 (according to ``zero_division``). Plus, if\n``zero_division != \"warn\"`` raises a warning.\n\nThe metric, modifier and average arguments are used only for determining\nan appropriate warning." - }, - { - "name": "_warn_prf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_set_wise_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation associated with set-wise metrics.\n\nReturns identified labels." - }, - { - "name": "precision_recall_fscore_support", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The strength of recall versus precision in the F-score." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['binary', 'micro', 'macro', 'samples', 'weighted']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "warn_for", - "type": "Union[Set, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This determines which warnings will be made in the case that this function is being used to return only one of its metrics." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division: - recall: when there are no positive labels - precision: when there are no positive predictions - f-score: both If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute precision, recall, F-measure and support for each class.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe F-beta score can be interpreted as a weighted harmonic mean of\nthe precision and recall, where an F-beta score reaches its best\nvalue at 1 and worst score at 0.\n\nThe F-beta score weights recall more than precision by a factor of\n``beta``. ``beta == 1.0`` means recall and precision are equally important.\n\nThe support is the number of occurrences of each class in ``y_true``.\n\nIf ``pos_label is None`` and in binary classification, this function\nreturns the average precision, recall and F-measure if ``average``\nis one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nbeta : float, default=1.0\n The strength of recall versus precision in the F-score.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'binary', 'micro', 'macro', 'samples','weighted'}, default=None\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nwarn_for : tuple or set, for internal use\n This determines which warnings will be made in the case that this\n function is being used to return only one of its metrics.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division:\n - recall: when there are no positive labels\n - precision: when there are no positive predictions\n - f-score: both\n\n If set to \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nprecision : float (if average is not None) or array of float, shape = [n_unique_labels]\n\nrecall : float (if average is not None) or array of float, , shape = [n_unique_labels]\n\nfbeta_score : float (if average is not None) or array of float, shape = [n_unique_labels]\n\nsupport : None (if average is not None) or array of int, shape = [n_unique_labels]\n The number of occurrences of each label in ``y_true``.\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Precision and recall\n `_.\n\n.. [2] `Wikipedia entry for the F1-score\n `_.\n\n.. [3] `Discriminative Methods for Multi-labeled Classification Advances\n in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu\n Godbole, Sunita Sarawagi\n `_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_fscore_support\n>>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])\n>>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])\n>>> precision_recall_fscore_support(y_true, y_pred, average='macro')\n(0.22..., 0.33..., 0.26..., None)\n>>> precision_recall_fscore_support(y_true, y_pred, average='micro')\n(0.33..., 0.33..., 0.33..., None)\n>>> precision_recall_fscore_support(y_true, y_pred, average='weighted')\n(0.22..., 0.33..., 0.26..., None)\n\nIt is possible to compute per-label precisions, recalls, F1-scores and\nsupports instead of averaging:\n\n>>> precision_recall_fscore_support(y_true, y_pred, average=None,\n... labels=['pig', 'dog', 'cat'])\n(array([0. , 0. , 0.66...]),\n array([0., 0., 1.]), array([0. , 0. , 0.8]),\n array([2, 2, 2]))" - }, - { - "name": "precision_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the precision.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nprecision : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Precision of the positive class in binary classification or weighted\n average of the precision of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, multilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision returns 0 and\nraises ``UndefinedMetricWarning``. This behavior can be\nmodified with ``zero_division``.\n\nExamples\n--------\n>>> from sklearn.metrics import precision_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> precision_score(y_true, y_pred, average='macro')\n0.22...\n>>> precision_score(y_true, y_pred, average='micro')\n0.33...\n>>> precision_score(y_true, y_pred, average='weighted')\n0.22...\n>>> precision_score(y_true, y_pred, average=None)\narray([0.66..., 0. , 0. ])\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> precision_score(y_true, y_pred, average=None)\narray([0.33..., 0. , 0. ])\n>>> precision_score(y_true, y_pred, average=None, zero_division=1)\narray([0.33..., 1. , 1. ])" - }, - { - "name": "recall_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. .. versionchanged:: 0.17 Parameter `labels` improved for multiclass problem." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted', 'binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the recall.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nrecall : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Recall of the positive class in binary classification or weighted\n average of the recall of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, balanced_accuracy_score,\nmultilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false negative == 0``, recall returns 0 and raises\n``UndefinedMetricWarning``. This behavior can be modified with\n``zero_division``.\n\nExamples\n--------\n>>> from sklearn.metrics import recall_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> recall_score(y_true, y_pred, average='macro')\n0.33...\n>>> recall_score(y_true, y_pred, average='micro')\n0.33...\n>>> recall_score(y_true, y_pred, average='weighted')\n0.33...\n>>> recall_score(y_true, y_pred, average=None)\narray([1., 0., 0.])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> recall_score(y_true, y_pred, average=None)\narray([0.5, 0. , 0. ])\n>>> recall_score(y_true, y_pred, average=None, zero_division=1)\narray([0.5, 1. , 1. ])" - }, - { - "name": "balanced_accuracy_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "adjusted", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When true, the result is adjusted for chance, so that random performance would score 0, and perfect performance scores 1." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the balanced accuracy.\n\nThe balanced accuracy in binary and multiclass classification problems to\ndeal with imbalanced datasets. It is defined as the average of recall\nobtained on each class.\n\nThe best value is 1 and the worst value is 0 when ``adjusted=False``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ny_true : 1d array-like\n Ground truth (correct) target values.\n\ny_pred : 1d array-like\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nadjusted : bool, default=False\n When true, the result is adjusted for chance, so that random\n performance would score 0, and perfect performance scores 1.\n\nReturns\n-------\nbalanced_accuracy : float\n\nSee Also\n--------\nrecall_score, roc_auc_score\n\nNotes\n-----\nSome literature promotes alternative definitions of balanced accuracy. Our\ndefinition is equivalent to :func:`accuracy_score` with class-balanced\nsample weights, and shares desirable properties with the binary case.\nSee the :ref:`User Guide `.\n\nReferences\n----------\n.. [1] Brodersen, K.H.; Ong, C.S.; Stephan, K.E.; Buhmann, J.M. (2010).\n The balanced accuracy and its posterior distribution.\n Proceedings of the 20th International Conference on Pattern\n Recognition, 3121-24.\n.. [2] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, (2015).\n `Fundamentals of Machine Learning for Predictive Data Analytics:\n Algorithms, Worked Examples, and Case Studies\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import balanced_accuracy_score\n>>> y_true = [0, 1, 0, 0, 1, 0]\n>>> y_pred = [0, 1, 0, 0, 0, 1]\n>>> balanced_accuracy_score(y_true, y_pred)\n0.625" - }, - { - "name": "classification_report", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated targets as returned by a classifier." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Optional list of label indices to include in the report." - }, - { - "name": "target_names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Optional display names matching the labels (same order)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "digits", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of digits for formatting output floating point values. When ``output_dict`` is ``True``, this will be ignored and the returned values will not be rounded." - }, - { - "name": "output_dict", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return output as dict. .. versionadded:: 0.20" - }, - { - "name": "zero_division", - "type": "Literal[\"warn\", 0, 1]", - "hasDefault": true, - "default": "\"warn\"", - "limitation": null, - "ignored": false, - "docstring": "Sets the value to return when there is a zero division. If set to \"warn\", this acts as 0, but warnings are also raised." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a text report showing the main classification metrics.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like of shape (n_labels,), default=None\n Optional list of label indices to include in the report.\n\ntarget_names : list of str of shape (n_labels,), default=None\n Optional display names matching the labels (same order).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndigits : int, default=2\n Number of digits for formatting output floating point values.\n When ``output_dict`` is ``True``, this will be ignored and the\n returned values will not be rounded.\n\noutput_dict : bool, default=False\n If True, return output as dict.\n\n .. versionadded:: 0.20\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nreport : string / dict\n Text summary of the precision, recall, F1 score for each class.\n Dictionary returned if output_dict is True. Dictionary has the\n following structure::\n\n {'label 1': {'precision':0.5,\n 'recall':1.0,\n 'f1-score':0.67,\n 'support':1},\n 'label 2': { ... },\n ...\n }\n\n The reported averages include macro average (averaging the unweighted\n mean per label), weighted average (averaging the support-weighted mean\n per label), and sample average (only for multilabel classification).\n Micro average (averaging the total true positives, false negatives and\n false positives) is only shown for multi-label or multi-class\n with a subset of classes, because it corresponds to accuracy\n otherwise and would be the same for all metrics.\n See also :func:`precision_recall_fscore_support` for more details\n on averages.\n\n Note that in binary classification, recall of the positive class\n is also known as \"sensitivity\"; recall of the negative class is\n \"specificity\".\n\nSee Also\n--------\nprecision_recall_fscore_support, confusion_matrix,\nmultilabel_confusion_matrix\n\nExamples\n--------\n>>> from sklearn.metrics import classification_report\n>>> y_true = [0, 1, 2, 2, 2]\n>>> y_pred = [0, 0, 2, 2, 1]\n>>> target_names = ['class 0', 'class 1', 'class 2']\n>>> print(classification_report(y_true, y_pred, target_names=target_names))\n precision recall f1-score support\n\n class 0 0.50 1.00 0.67 1\n class 1 0.00 0.00 0.00 1\n class 2 1.00 0.67 0.80 3\n\n accuracy 0.60 5\n macro avg 0.50 0.56 0.49 5\nweighted avg 0.70 0.60 0.61 5\n\n>>> y_pred = [1, 1, 0]\n>>> y_true = [1, 1, 1]\n>>> print(classification_report(y_true, y_pred, labels=[1, 2, 3]))\n precision recall f1-score support\n\n 1 1.00 0.67 0.80 3\n 2 0.00 0.00 0.00 0\n 3 0.00 0.00 0.00 0\n\n micro avg 1.00 0.67 0.80 3\n macro avg 0.33 0.22 0.27 3\nweighted avg 1.00 0.67 0.80 3\n" - }, - { - "name": "hamming_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels, as returned by a classifier." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the average Hamming loss.\n\nThe Hamming loss is the fraction of labels that are incorrectly predicted.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nloss : float or int\n Return the average Hamming loss between element of ``y_true`` and\n ``y_pred``.\n\nSee Also\n--------\naccuracy_score, jaccard_score, zero_one_loss\n\nNotes\n-----\nIn multiclass classification, the Hamming loss corresponds to the Hamming\ndistance between ``y_true`` and ``y_pred`` which is equivalent to the\nsubset ``zero_one_loss`` function, when `normalize` parameter is set to\nTrue.\n\nIn multilabel classification, the Hamming loss is different from the\nsubset zero-one loss. The zero-one loss considers the entire set of labels\nfor a given sample incorrect if it does not entirely match the true set of\nlabels. Hamming loss is more forgiving in that it penalizes only the\nindividual labels.\n\nThe Hamming loss is upperbounded by the subset zero-one loss, when\n`normalize` parameter is set to True. It is always between 0 and 1,\nlower being better.\n\nReferences\n----------\n.. [1] Grigorios Tsoumakas, Ioannis Katakis. Multi-Label Classification:\n An Overview. International Journal of Data Warehousing & Mining,\n 3(3), 1-13, July-September 2007.\n\n.. [2] `Wikipedia entry on the Hamming distance\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import hamming_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> hamming_loss(y_true, y_pred)\n0.25\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))\n0.75" - }, - { - "name": "log_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels for n_samples samples." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the positive class. The labels in ``y_pred`` are assumed to be ordered alphabetically, as done by :class:`preprocessing.LabelBinarizer`." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "1e-15", - "limitation": null, - "ignored": false, - "docstring": "Log loss is undefined for p=0 or p=1, so probabilities are clipped to max(eps, min(1 - eps, p))." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not provided, labels will be inferred from y_true. If ``labels`` is ``None`` and ``y_pred`` has shape (n_samples,) the labels are assumed to be binary and are inferred from ``y_true``. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\nand a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels for n_samples samples.\n\ny_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n Predicted probabilities, as returned by a classifier's\n predict_proba method. If ``y_pred.shape = (n_samples,)``\n the probabilities provided are assumed to be that of the\n positive class. The labels in ``y_pred`` are assumed to be\n ordered alphabetically, as done by\n :class:`preprocessing.LabelBinarizer`.\n\neps : float, default=1e-15\n Log loss is undefined for p=0 or p=1, so probabilities are\n clipped to max(eps, min(1 - eps, p)).\n\nnormalize : bool, default=True\n If true, return the mean loss per sample.\n Otherwise, return the sum of the per-sample losses.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nlabels : array-like, default=None\n If not provided, labels will be inferred from y_true. If ``labels``\n is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n assumed to be binary and are inferred from ``y_true``.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nloss : float\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nExamples\n--------\n>>> from sklearn.metrics import log_loss\n>>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n0.21616...\n\nReferences\n----------\nC.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\np. 209." - }, - { - "name": "hinge_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True target, consisting of integers of two values. The positive label must be greater than the negative label." - }, - { - "name": "pred_decision", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted decisions, as output by decision_function (floats)." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Contains all the labels for the problem. Used in multiclass hinge loss." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Average hinge loss (non-regularized).\n\nIn binary class case, assuming labels in y_true are encoded with +1 and -1,\nwhen a prediction mistake is made, ``margin = y_true * pred_decision`` is\nalways negative (since the signs disagree), implying ``1 - margin`` is\nalways greater than 1. The cumulated hinge loss is therefore an upper\nbound of the number of mistakes made by the classifier.\n\nIn multiclass case, the function expects that either all the labels are\nincluded in y_true or an optional labels argument is provided which\ncontains all the labels. The multilabel margin is calculated according\nto Crammer-Singer's method. As in the binary case, the cumulated hinge loss\nis an upper bound of the number of mistakes made by the classifier.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n True target, consisting of integers of two values. The positive label\n must be greater than the negative label.\n\npred_decision : array of shape (n_samples,) or (n_samples, n_classes)\n Predicted decisions, as output by decision_function (floats).\n\nlabels : array-like, default=None\n Contains all the labels for the problem. Used in multiclass hinge loss.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n\nReferences\n----------\n.. [1] `Wikipedia entry on the Hinge loss\n `_.\n\n.. [2] Koby Crammer, Yoram Singer. On the Algorithmic\n Implementation of Multiclass Kernel-based Vector\n Machines. Journal of Machine Learning Research 2,\n (2001), 265-292.\n\n.. [3] `L1 AND L2 Regularization for Multiclass Hinge Loss Models\n by Robert C. Moore, John DeNero\n `_.\n\nExamples\n--------\n>>> from sklearn import svm\n>>> from sklearn.metrics import hinge_loss\n>>> X = [[0], [1]]\n>>> y = [-1, 1]\n>>> est = svm.LinearSVC(random_state=0)\n>>> est.fit(X, y)\nLinearSVC(random_state=0)\n>>> pred_decision = est.decision_function([[-2], [3], [0.5]])\n>>> pred_decision\narray([-2.18..., 2.36..., 0.09...])\n>>> hinge_loss([-1, 1, 1], pred_decision)\n0.30...\n\nIn the multiclass case:\n\n>>> import numpy as np\n>>> X = np.array([[0], [1], [2], [3]])\n>>> Y = np.array([0, 1, 2, 3])\n>>> labels = np.array([0, 1, 2, 3])\n>>> est = svm.LinearSVC()\n>>> est.fit(X, Y)\nLinearSVC()\n>>> pred_decision = est.decision_function([[-1], [2], [3]])\n>>> y_true = [0, 2, 3]\n>>> hinge_loss(y_true, pred_decision, labels=labels)\n0.56..." - }, - { - "name": "brier_score_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets." - }, - { - "name": "y_prob", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Probabilities of the positive class." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Label of the positive class. `pos_label` will be infered in the following manner: * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1; * else if `y_true` contains string, an error will be raised and `pos_label` should be explicitely specified; * otherwise, `pos_label` defaults to the greater label, i.e. `np.unique(y_true)[-1]`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed is the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n True targets.\n\ny_prob : array of shape (n_samples,)\n Probabilities of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\npos_label : int or str, default=None\n Label of the positive class. `pos_label` will be infered in the\n following manner:\n\n * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n * else if `y_true` contains string, an error will be raised and\n `pos_label` should be explicitely specified;\n * otherwise, `pos_label` defaults to the greater label,\n i.e. `np.unique(y_true)[-1]`.\n\nReturns\n-------\nscore : float\n Brier score loss.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import brier_score_loss\n>>> y_true = np.array([0, 1, 1, 0])\n>>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n>>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n>>> brier_score_loss(y_true, y_prob)\n0.037...\n>>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n0.037...\n>>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n0.037...\n>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n0.0\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Brier score\n `_." - } - ] - }, - { - "name": "sklearn.metrics._ranking", - "imports": [ - "import warnings", - "from functools import partial", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.stats import rankdata", - "from utils import assert_all_finite", - "from utils import check_consistent_length", - "from utils import column_or_1d", - "from utils import check_array", - "from utils.multiclass import type_of_target", - "from utils.extmath import stable_cumsum", - "from utils.sparsefuncs import count_nonzero", - "from utils.validation import _deprecate_positional_args", - "from exceptions import UndefinedMetricWarning", - "from preprocessing import label_binarize", - "from utils._encode import _encode", - "from utils._encode import _unique", - "from _base import _average_binary_score", - "from _base import _average_multiclass_ovo_score", - "from _base import _check_pos_label_consistency" - ], - "classes": [], - "functions": [ - { - "name": "auc", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "x coordinates. These must be either monotonic increasing or monotonic decreasing." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "y coordinates." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Area Under the Curve (AUC) using the trapezoidal rule.\n\nThis is a general function, given points on a curve. For computing the\narea under the ROC-curve, see :func:`roc_auc_score`. For an alternative\nway to summarize a precision-recall curve, see\n:func:`average_precision_score`.\n\nParameters\n----------\nx : ndarray of shape (n,)\n x coordinates. These must be either monotonic increasing or monotonic\n decreasing.\ny : ndarray of shape, (n,)\n y coordinates.\n\nReturns\n-------\nauc : float\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\naverage_precision_score : Compute average precision from prediction scores.\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=2)\n>>> metrics.auc(fpr, tpr)\n0.75" - }, - { - "name": "average_precision_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels or binary label indicators." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by :term:`decision_function` on some classifiers)." - }, - { - "name": "average", - "type": "Literal['micro', 'samples', 'weighted', 'macro']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'micro'``: Calculate metrics globally by considering each element of the label indicator matrix as a label. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). ``'samples'``: Calculate metrics for each instance, and find their average. Will be ignored when ``y_true`` is binary." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. Only applied to binary ``y_true``. For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,) or (n_samples, n_classes)\n True binary labels or binary label indicators.\n\ny_score : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by :term:`decision_function` on some classifiers).\n\naverage : {'micro', 'samples', 'weighted', 'macro'} or None, default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\npos_label : int or str, default=1\n The label of the positive class. Only applied to binary ``y_true``.\n For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\naverage_precision : float\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\nNotes\n-----\n.. versionchanged:: 0.19\n Instead of linearly interpolating between operating points, precisions\n are weighted by the change in recall since the last operating point.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Average precision\n `_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import average_precision_score\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> average_precision_score(y_true, y_scores)\n0.83..." - }, - { - "name": "det_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute error rates for different probability thresholds.\n\n.. note::\n This metric is used for evaluation of ranking and error tradeoffs of\n a binary classification task.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\ny_score : ndarray of shape of (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nfpr : ndarray of shape (n_thresholds,)\n False positive rate (FPR) such that element i is the false positive\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false acceptance propability or fall-out.\n\nfnr : ndarray of shape (n_thresholds,)\n False negative rate (FNR) such that element i is the false negative\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false rejection or miss rate.\n\nthresholds : ndarray of shape (n_thresholds,)\n Decreasing score values.\n\nSee Also\n--------\nplot_det_curve : Plot detection error tradeoff (DET) curve.\nDetCurveDisplay : DET curve visualization.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nprecision_recall_curve : Compute precision-recall curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import det_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = det_curve(y_true, y_scores)\n>>> fpr\narray([0.5, 0.5, 0. ])\n>>> fnr\narray([0. , 0.5, 0.5])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])" - }, - { - "name": "_binary_roc_auc_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Binary roc auc score." - }, - { - "name": "roc_auc_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels or binary label indicators. The binary and multiclass cases expect labels with shape (n_samples,) while the multilabel case expects binary label indicators with shape (n_samples, n_classes)." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores. * In the binary case, it corresponds to an array of shape `(n_samples,)`. Both probability estimates and non-thresholded decision values can be provided. The probability estimates correspond to the **probability of the class with the greater label**, i.e. `estimator.classes_[1]` and thus `estimator.predict_proba(X, y)[:, 1]`. The decision values corresponds to the output of `estimator.decision_function(X, y)`. See more information in the :ref:`User guide `; * In the multiclass case, it corresponds to an array of shape `(n_samples, n_classes)` of probability estimates provided by the `predict_proba` method. The probability estimates **must** sum to 1 across the possible classes. In addition, the order of the class scores must correspond to the order of ``labels``, if provided, or else to the numerical or lexicographical order of the labels in ``y_true``. See more information in the :ref:`User guide `; * In the multilabel case, it corresponds to an array of shape `(n_samples, n_classes)`. Probability estimates are provided by the `predict_proba` method and the non-thresholded decision values by the `decision_function` method. The probability estimates correspond to the **probability of the class with the greater label for each output** of the classifier. See more information in the :ref:`User guide `." - }, - { - "name": "average", - "type": "Literal['micro', 'macro', 'samples', 'weighted']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: Note: multiclass ROC AUC currently only handles the 'macro' and 'weighted' averages. ``'micro'``: Calculate metrics globally by considering each element of the label indicator matrix as a label. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). ``'samples'``: Calculate metrics for each instance, and find their average. Will be ignored when ``y_true`` is binary." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "max_fpr", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not ``None``, the standardized partial AUC [2]_ over the range [0, max_fpr] is returned. For the multiclass case, ``max_fpr``, should be either equal to ``None`` or ``1.0`` as AUC ROC partial computation currently is not supported for multiclass." - }, - { - "name": "multi_class", - "type": "Literal['raise', 'ovr', 'ovo']", - "hasDefault": true, - "default": "'raise'", - "limitation": null, - "ignored": false, - "docstring": "Only used for multiclass targets. Determines the type of configuration to use. The default value raises an error, so either ``'ovr'`` or ``'ovo'`` must be passed explicitly. ``'ovr'``: Stands for One-vs-rest. Computes the AUC of each class against the rest [3]_ [4]_. This treats the multiclass case in the same way as the multilabel case. Sensitive to class imbalance even when ``average == 'macro'``, because class imbalance affects the composition of each of the 'rest' groupings. ``'ovo'``: Stands for One-vs-one. Computes the average AUC of all possible pairwise combinations of classes [5]_. Insensitive to class imbalance when ``average == 'macro'``." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only used for multiclass targets. List of labels that index the classes in ``y_score``. If ``None``, the numerical or lexicographical order of the labels in ``y_true`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)\nfrom prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n True labels or binary label indicators. The binary and multiclass cases\n expect labels with shape (n_samples,) while the multilabel case expects\n binary label indicators with shape (n_samples, n_classes).\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores.\n\n * In the binary case, it corresponds to an array of shape\n `(n_samples,)`. Both probability estimates and non-thresholded\n decision values can be provided. The probability estimates correspond\n to the **probability of the class with the greater label**,\n i.e. `estimator.classes_[1]` and thus\n `estimator.predict_proba(X, y)[:, 1]`. The decision values\n corresponds to the output of `estimator.decision_function(X, y)`.\n See more information in the :ref:`User guide `;\n * In the multiclass case, it corresponds to an array of shape\n `(n_samples, n_classes)` of probability estimates provided by the\n `predict_proba` method. The probability estimates **must**\n sum to 1 across the possible classes. In addition, the order of the\n class scores must correspond to the order of ``labels``,\n if provided, or else to the numerical or lexicographical order of\n the labels in ``y_true``. See more information in the\n :ref:`User guide `;\n * In the multilabel case, it corresponds to an array of shape\n `(n_samples, n_classes)`. Probability estimates are provided by the\n `predict_proba` method and the non-thresholded decision values by\n the `decision_function` method. The probability estimates correspond\n to the **probability of the class with the greater label for each\n output** of the classifier. See more information in the\n :ref:`User guide `.\n\naverage : {'micro', 'macro', 'samples', 'weighted'} or None, default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n Note: multiclass ROC AUC currently only handles the 'macro' and\n 'weighted' averages.\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmax_fpr : float > 0 and <= 1, default=None\n If not ``None``, the standardized partial AUC [2]_ over the range\n [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n computation currently is not supported for multiclass.\n\nmulti_class : {'raise', 'ovr', 'ovo'}, default='raise'\n Only used for multiclass targets. Determines the type of configuration\n to use. The default value raises an error, so either\n ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n ``'ovr'``:\n Stands for One-vs-rest. Computes the AUC of each class\n against the rest [3]_ [4]_. This\n treats the multiclass case in the same way as the multilabel case.\n Sensitive to class imbalance even when ``average == 'macro'``,\n because class imbalance affects the composition of each of the\n 'rest' groupings.\n ``'ovo'``:\n Stands for One-vs-one. Computes the average AUC of all\n possible pairwise combinations of classes [5]_.\n Insensitive to class imbalance when\n ``average == 'macro'``.\n\nlabels : array-like of shape (n_classes,), default=None\n Only used for multiclass targets. List of labels that index the\n classes in ``y_score``. If ``None``, the numerical or lexicographical\n order of the labels in ``y_true`` is used.\n\nReturns\n-------\nauc : float\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n.. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n `_\n\n.. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n probability estimation trees (Section 6.2), CeDER Working Paper\n #IS-00-04, Stern School of Business, New York University.\n\n.. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n Recognition Letters, 27(8), 861-874.\n `_\n\n.. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n Under the ROC Curve for Multiple Class Classification Problems.\n Machine Learning, 45(2), 171-186.\n `_\n\nSee Also\n--------\naverage_precision_score : Area under the precision-recall curve.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\nBinary case:\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.metrics import roc_auc_score\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n0.99...\n>>> roc_auc_score(y, clf.decision_function(X))\n0.99...\n\nMulticlass case:\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n0.99...\n\nMultilabel case:\n\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> X, y = make_multilabel_classification(random_state=0)\n>>> clf = MultiOutputClassifier(clf).fit(X, y)\n>>> # get a list of n_output containing probability arrays of shape\n>>> # (n_samples, n_classes)\n>>> y_pred = clf.predict_proba(X)\n>>> # extract the positive columns for each output\n>>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n>>> roc_auc_score(y, y_pred, average=None)\narray([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> clf = RidgeClassifierCV().fit(X, y)\n>>> roc_auc_score(y, clf.decision_function(X), average=None)\narray([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])" - }, - { - "name": "_multiclass_roc_auc_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True multiclass labels." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores corresponding to probability estimates of a sample belonging to a particular class" - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index ``y_score`` used for multiclass. If ``None``, the lexical order of ``y_true`` is used to index ``y_score``." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'ovo']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the type of multiclass configuration to use. ``'ovr'``: Calculate metrics for the multiclass case using the one-vs-rest approach. ``'ovo'``: Calculate metrics for the multiclass case using the one-vs-one approach." - }, - { - "name": "average", - "type": "Literal['macro', 'weighted']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. Classes are assumed to be uniformly distributed. ``'weighted'``: Calculate metrics for each label, taking into account the prevalence of the classes." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Multiclass roc auc score.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class\n\nlabels : array-like of shape (n_classes,) or None\n List of labels to index ``y_score`` used for multiclass. If ``None``,\n the lexical order of ``y_true`` is used to index ``y_score``.\n\nmulti_class : {'ovr', 'ovo'}\n Determines the type of multiclass configuration to use.\n ``'ovr'``:\n Calculate metrics for the multiclass case using the one-vs-rest\n approach.\n ``'ovo'``:\n Calculate metrics for the multiclass case using the one-vs-one\n approach.\n\naverage : {'macro', 'weighted'}\n Determines the type of averaging performed on the pairwise binary\n metric scores\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\nsample_weight : array-like of shape (n_samples,) or None\n Sample weights." - }, - { - "name": "_binary_clf_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of binary classification." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated probabilities or output of a decision function." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate true and false positives per binary classification threshold.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True targets of binary classification.\n\ny_score : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\npos_label : int or str, default=None\n The label of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nfps : ndarray of shape (n_thresholds,)\n A count of false positives, at index i being the number of negative\n samples assigned a score >= thresholds[i]. The total number of\n negative samples is equal to fps[-1] (thus true negatives are given by\n fps[-1] - fps).\n\ntps : ndarray of shape (n_thresholds,)\n An increasing count of true positives, at index i being the number\n of positive samples assigned a score >= thresholds[i]. The total\n number of positive samples is equal to tps[-1] (thus false negatives\n are given by tps[-1] - tps).\n\nthresholds : ndarray of shape (n_thresholds,)\n Decreasing score values." - }, - { - "name": "precision_recall_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given." - }, - { - "name": "probas_pred", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated probabilities or output of a decision function." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute precision-recall pairs for different probability thresholds.\n\nNote: this implementation is restricted to the binary classification task.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe last precision and recall values are 1. and 0. respectively and do not\nhave a corresponding threshold. This ensures that the graph starts on the\ny axis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\nprobas_pred : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nprecision : ndarray of shape (n_thresholds + 1,)\n Precision values such that element i is the precision of\n predictions with score >= thresholds[i] and the last element is 1.\n\nrecall : ndarray of shape (n_thresholds + 1,)\n Decreasing recall values such that element i is the recall of\n predictions with score >= thresholds[i] and the last element is 0.\n\nthresholds : ndarray of shape (n_thresholds,)\n Increasing thresholds on the decision function used to compute\n precision and recall. n_thresholds <= len(np.unique(probas_pred)).\n\nSee Also\n--------\nplot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\nPrecisionRecallDisplay : Precision Recall visualization.\naverage_precision_score : Compute average precision from prediction scores.\ndet_curve: Compute error rates for different probability thresholds.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> precision, recall, thresholds = precision_recall_curve(\n... y_true, y_scores)\n>>> precision\narray([0.66666667, 0.5 , 1. , 1. ])\n>>> recall\narray([1. , 0.5, 0.5, 0. ])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])" - }, - { - "name": "roc_curve", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "pos_label", - "type": "Union[str, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "drop_intermediate", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve. This is useful in order to create lighter ROC curves. .. versionadded:: 0.17 parameter *drop_intermediate*." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Receiver operating characteristic (ROC).\n\nNote: this implementation is restricted to the binary classification task.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\ny_score : ndarray of shape (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndrop_intermediate : bool, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\n .. versionadded:: 0.17\n parameter *drop_intermediate*.\n\nReturns\n-------\nfpr : ndarray of shape (>2,)\n Increasing false positive rates such that element i is the false\n positive rate of predictions with score >= `thresholds[i]`.\n\ntpr : ndarray of shape (>2,)\n Increasing true positive rates such that element `i` is the true\n positive rate of predictions with score >= `thresholds[i]`.\n\nthresholds : ndarray of shape = (n_thresholds,)\n Decreasing thresholds on the decision function used to compute\n fpr and tpr. `thresholds[0]` represents no instances being predicted\n and is arbitrarily set to `max(y_score) + 1`.\n\nSee Also\n--------\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\ndet_curve: Compute error rates for different probability thresholds.\nroc_auc_score : Compute the area under the ROC curve.\n\nNotes\n-----\nSince the thresholds are sorted from low to high values, they\nare reversed upon returning them to ensure they correspond to both ``fpr``\nand ``tpr``, which are sorted in reversed order during their calculation.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n Letters, 2006, 27(8):861-874.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n>>> fpr\narray([0. , 0. , 0.5, 0.5, 1. ])\n>>> tpr\narray([0. , 0.5, 0.5, 1. , 1. ])\n>>> thresholds\narray([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])" - }, - { - "name": "label_ranking_average_precision_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary indicator format." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute ranking-based average precision.\n\nLabel ranking average precision (LRAP) is the average over each ground\ntruth label assigned to each sample, of the ratio of true vs. total\nlabels with lower score.\n\nThis metric is used in multilabel ranking problem, where the goal\nis to give better rank to the labels associated to each sample.\n\nThe obtained score is always strictly greater than 0 and\nthe best value is 1.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscore : float\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import label_ranking_average_precision_score\n>>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n>>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n>>> label_ranking_average_precision_score(y_true, y_score)\n0.416..." - }, - { - "name": "coverage_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary indicator format." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\ncoverage_error : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US." - }, - { - "name": "label_ranking_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True binary labels in binary indicator format." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates of the positive class, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n A function *label_ranking_loss*\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US." - }, - { - "name": "_dcg_sample_scores", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "log_base", - "type": "float", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important)." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nlog_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : ndarray of shape (n_samples,)\n The DCG score for each sample.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1." - }, - { - "name": "_tie_averaged_dcg", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The true relevance scores." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted scores." - }, - { - "name": "discount_cumsum", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed cumulative sum of the discounts." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute DCG by averaging over possible permutations of ties.\n\nThe gain (`y_true`) of an index falling inside a tied group (in the order\ninduced by `y_score`) is replaced by the average gain within this group.\nThe discounted gain for a tied group is then the average `y_true` within\nthis group times the sum of discounts of the corresponding ranks.\n\nThis amounts to averaging scores for all possible orderings of the tied\ngroups.\n\n(note in the case of dcg@k the discount is 0 after index k)\n\nParameters\n----------\ny_true : ndarray\n The true relevance scores.\n\ny_score : ndarray\n Predicted scores.\n\ndiscount_cumsum : ndarray\n Precomputed cumulative sum of the discounts.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n The discounted cumulative gain.\n\nReferences\n----------\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg." - }, - { - "name": "_check_dcg_target_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "dcg_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "log_base", - "type": "float", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important)." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, all samples are given the same weight." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nUsually the Normalized Discounted Cumulative Gain (NDCG, computed by\nndcg_score) is preferred.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nlog_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n The averaged sample DCG scores.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1.\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n`_.\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013).\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> from sklearn.metrics import dcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict scores for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> dcg_score(true_relevance, scores)\n9.49...\n>>> # we can set k to truncate the sum; only top k answers contribute\n>>> dcg_score(true_relevance, scores, k=2)\n5.63...\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average true\n>>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n>>> dcg_score(true_relevance, scores, k=1)\n7.5\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> dcg_score(true_relevance,\n... scores, k=1, ignore_ties=True)\n5.0" - }, - { - "name": "_ndcg_sample_scores", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : ndarray of shape (n_samples,)\n The NDCG score for each sample (float in [0., 1.]).\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized)." - }, - { - "name": "ndcg_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True targets of multilabel classification, or true scores of entities to be ranked." - }, - { - "name": "y_score", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores, can either be probability estimates, confidence values, or non-thresholded measure of decisions (as returned by \"decision_function\" on some classifiers)." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Only consider the highest k scores in the ranking. If None, use all outputs." - }, - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, all samples are given the same weight." - }, - { - "name": "ignore_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : float in [0., 1.]\n The averaged NDCG scores for all samples.\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized).\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n`_\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013)\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> from sklearn.metrics import ndcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict some scores (relevance) for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> ndcg_score(true_relevance, scores)\n0.69...\n>>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n>>> ndcg_score(true_relevance, scores)\n0.49...\n>>> # we can set k to truncate the sum; only top k answers contribute.\n>>> ndcg_score(true_relevance, scores, k=4)\n0.35...\n>>> # the normalization takes k into account so a perfect answer\n>>> # would still get 1.0\n>>> ndcg_score(true_relevance, true_relevance, k=4)\n1.0\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average (normalized)\n>>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n>>> ndcg_score(true_relevance, scores, k=1)\n0.75\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> ndcg_score(true_relevance,\n... scores, k=1, ignore_ties=True)\n0.5" - }, - { - "name": "top_k_accuracy_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True labels." - }, - { - "name": "y_score", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target scores. These can be either probability estimates or non-thresholded decision values (as returned by :term:`decision_function` on some classifiers). The binary case expects scores with shape (n_samples,) while the multiclass case expects scores with shape (n_samples, n_classes). In the nulticlass case, the order of the class scores must correspond to the order of ``labels``, if provided, or else to the numerical or lexicographical order of the labels in ``y_true``." - }, - { - "name": "k", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of most likely outcomes considered to find the correct label." - }, - { - "name": "normalize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If `True`, return the fraction of correctly classified samples. Otherwise, return the number of correctly classified samples." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If `None`, all samples are given the same weight." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Multiclass only. List of labels that index the classes in ``y_score``. If ``None``, the numerical or lexicographical order of the labels in ``y_true`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide `\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True labels.\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores. These can be either probability estimates or\n non-thresholded decision values (as returned by\n :term:`decision_function` on some classifiers). The binary case expects\n scores with shape (n_samples,) while the multiclass case expects scores\n with shape (n_samples, n_classes). In the nulticlass case, the order of\n the class scores must correspond to the order of ``labels``, if\n provided, or else to the numerical or lexicographical order of the\n labels in ``y_true``.\n\nk : int, default=2\n Number of most likely outcomes considered to find the correct label.\n\nnormalize : bool, default=True\n If `True`, return the fraction of correctly classified samples.\n Otherwise, return the number of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If `None`, all samples are given the same weight.\n\nlabels : array-like of shape (n_classes,), default=None\n Multiclass only. List of labels that index the classes in ``y_score``.\n If ``None``, the numerical or lexicographical order of the labels in\n ``y_true`` is used.\n\nReturns\n-------\nscore : float\n The top-k accuracy score. The best performance is 1 with\n `normalize == True` and the number of samples with\n `normalize == False`.\n\nSee also\n--------\naccuracy_score\n\nNotes\n-----\nIn cases where two or more labels are assigned equal predicted scores,\nthe labels with the highest indices will be chosen first. This might\nimpact the result if the correct label falls after the threshold because\nof that.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import top_k_accuracy_score\n>>> y_true = np.array([0, 1, 2, 2])\n>>> y_score = np.array([[0.5, 0.2, 0.2], # 0 is in top 2\n... [0.3, 0.4, 0.2], # 1 is in top 2\n... [0.2, 0.4, 0.3], # 2 is in top 2\n... [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n>>> top_k_accuracy_score(y_true, y_score, k=2)\n0.75\n>>> # Not normalizing gives the number of \"correctly\" classified samples\n>>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n3" - } - ] - }, - { - "name": "sklearn.metrics._regression", - "imports": [ - "import numpy as np", - "import warnings", - "from _loss.glm_distribution import TweedieDistribution", - "from utils.validation import check_array", - "from utils.validation import check_consistent_length", - "from utils.validation import _num_samples", - "from utils.validation import column_or_1d", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import _check_sample_weight", - "from utils.stats import _weighted_percentile", - "from exceptions import UndefinedMetricWarning" - ], - "classes": [], - "functions": [ - { - "name": "_check_reg_targets", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "multioutput", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "'variance_weighted'] or None None is accepted due to backward compatibility of r2_score()." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that y_true and y_pred belong to the same regression task.\n\nParameters\n----------\ny_true : array-like\n\ny_pred : array-like\n\nmultioutput : array-like or string in ['raw_values', uniform_average',\n 'variance_weighted'] or None\n None is accepted due to backward compatibility of r2_score().\n\nReturns\n-------\ntype_true : one of {'continuous', continuous-multioutput'}\n The type of the true target data, as output by\n 'utils.multiclass.type_of_target'.\n\ny_true : array-like of shape (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples, n_outputs)\n Estimated target values.\n\nmultioutput : array-like of shape (n_outputs) or string in ['raw_values',\n uniform_average', 'variance_weighted'] or None\n Custom output weights if ``multioutput`` is array-like or\n just the corresponding argument if ``multioutput`` is a\n correct keyword.\n\ndtype : str or list, default=\"numeric\"\n the dtype argument passed to check_array." - }, - { - "name": "mean_absolute_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean absolute error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n\nReturns\n-------\nloss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAE output is non-negative floating point. The best value is 0.0.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_absolute_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_absolute_error(y_true, y_pred)\n0.5\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> mean_absolute_error(y_true, y_pred)\n0.75\n>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')\narray([0.5, 1. ])\n>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.85..." - }, - { - "name": "mean_absolute_percentage_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. If input is list then the shape must be (n_outputs,). 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean absolute percentage error regression loss.\n\nNote here that we do not represent the output as a percentage in range\n[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n If input is list then the shape must be (n_outputs,).\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats in the range [0, 1/eps]\n If multioutput is 'raw_values', then mean absolute percentage error\n is returned for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAPE output is non-negative floating point. The best value is 0.0.\n But note the fact that bad predictions can lead to arbitarily large\n MAPE values, especially if some y_true values are very close to zero.\n Note that we return a large value instead of `inf` when y_true is zero.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_absolute_percentage_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_absolute_percentage_error(y_true, y_pred)\n0.3273...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> mean_absolute_percentage_error(y_true, y_pred)\n0.5515...\n>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.6198..." - }, - { - "name": "mean_squared_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True returns MSE value, if False returns RMSE value." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean squared error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nsquared : bool, default=True\n If True returns MSE value, if False returns RMSE value.\n\nReturns\n-------\nloss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_squared_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_squared_error(y_true, y_pred)\n0.375\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_squared_error(y_true, y_pred, squared=False)\n0.612...\n>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]\n>>> y_pred = [[0, 2],[-1, 2],[8, -5]]\n>>> mean_squared_error(y_true, y_pred)\n0.708...\n>>> mean_squared_error(y_true, y_pred, squared=False)\n0.822...\n>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')\narray([0.41666667, 1. ])\n>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.825..." - }, - { - "name": "mean_squared_log_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors when the input is of multioutput format. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean squared logarithmic error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors when the input is of multioutput\n format.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_squared_log_error\n>>> y_true = [3, 5, 2.5, 7]\n>>> y_pred = [2.5, 5, 4, 8]\n>>> mean_squared_log_error(y_true, y_pred)\n0.039...\n>>> y_true = [[0.5, 1], [1, 2], [7, 6]]\n>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]\n>>> mean_squared_log_error(y_true, y_pred)\n0.044...\n>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')\narray([0.00462428, 0.08377444])\n>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.060..." - }, - { - "name": "median_absolute_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Median absolute error regression loss.\n\nMedian absolute error output is non-negative floating point. The best value\nis 0.0. Read more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Estimated target values.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values. Array-like value defines\n weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nloss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\nExamples\n--------\n>>> from sklearn.metrics import median_absolute_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> median_absolute_error(y_true, y_pred)\n0.5\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> median_absolute_error(y_true, y_pred)\n0.75\n>>> median_absolute_error(y_true, y_pred, multioutput='raw_values')\narray([0.5, 1. ])\n>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.85" - }, - { - "name": "explained_variance_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average', 'variance_weighted']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\nReturns\n-------\nscore : float or ndarray of floats\n The explained variance or ndarray if 'multioutput' is 'raw_values'.\n\nNotes\n-----\nThis is not a symmetric function.\n\nExamples\n--------\n>>> from sklearn.metrics import explained_variance_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> explained_variance_score(y_true, y_pred)\n0.957...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')\n0.983..." - }, - { - "name": "r2_score", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "multioutput", - "type": "Literal['raw_values', 'uniform_average', 'variance_weighted']", - "hasDefault": true, - "default": "'uniform_average'", - "limitation": null, - "ignored": false, - "docstring": "Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. Default is \"uniform_average\". 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. .. versionchanged:: 0.19 Default value of multioutput is 'uniform_average'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "R^2 (coefficient of determination) regression score function.\n\nBest possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, array-like of shape (n_outputs,) or None, default='uniform_average'\n\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n Default is \"uniform_average\".\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\n .. versionchanged:: 0.19\n Default value of multioutput is 'uniform_average'.\n\nReturns\n-------\nz : float or ndarray of floats\n The R^2 score or ndarray of scores if 'multioutput' is\n 'raw_values'.\n\nNotes\n-----\nThis is not a symmetric function.\n\nUnlike most other scores, R^2 score may be negative (it need not actually\nbe the square of a quantity R).\n\nThis metric is not well-defined for single samples and will return a NaN\nvalue if n_samples is less than two.\n\nReferences\n----------\n.. [1] `Wikipedia entry on the Coefficient of determination\n `_\n\nExamples\n--------\n>>> from sklearn.metrics import r2_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> r2_score(y_true, y_pred)\n0.948...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> r2_score(y_true, y_pred,\n... multioutput='variance_weighted')\n0.938...\n>>> y_true = [1, 2, 3]\n>>> y_pred = [1, 2, 3]\n>>> r2_score(y_true, y_pred)\n1.0\n>>> y_true = [1, 2, 3]\n>>> y_pred = [2, 2, 2]\n>>> r2_score(y_true, y_pred)\n0.0\n>>> y_true = [1, 2, 3]\n>>> y_pred = [3, 2, 1]\n>>> r2_score(y_true, y_pred)\n-3.0" - }, - { - "name": "max_error", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "max_error metric calculates the maximum residual error.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values.\n\nReturns\n-------\nmax_error : float\n A positive floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import max_error\n>>> y_true = [3, 2, 7, 1]\n>>> y_pred = [4, 2, 7, 1]\n>>> max_error(y_true, y_pred)\n1" - }, - { - "name": "mean_tweedie_deviance", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "power", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean Tweedie deviance regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\npower : float, default=0\n Tweedie power parameter. Either power <= 0 or power >= 1.\n\n The higher `p` the less weight is given to extreme\n deviations between true and predicted targets.\n\n - power < 0: Extreme stable distribution. Requires: y_pred > 0.\n - power = 0 : Normal distribution, output corresponds to\n mean_squared_error. y_true and y_pred can be any real numbers.\n - power = 1 : Poisson distribution. Requires: y_true >= 0 and\n y_pred > 0.\n - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n and y_pred > 0.\n - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n and y_pred > 0.\n - otherwise : Positive stable distribution. Requires: y_true > 0\n and y_pred > 0.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_tweedie_deviance\n>>> y_true = [2, 0, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_tweedie_deviance(y_true, y_pred, power=1)\n1.4260..." - }, - { - "name": "mean_poisson_deviance", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values. Requires y_true >= 0." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values. Requires y_pred > 0." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean Poisson deviance regression loss.\n\nPoisson deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=1`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true >= 0.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_poisson_deviance\n>>> y_true = [2, 0, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_poisson_deviance(y_true, y_pred)\n1.4260..." - }, - { - "name": "mean_gamma_deviance", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) target values. Requires y_true > 0." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimated target values. Requires y_pred > 0." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mean Gamma deviance regression loss.\n\nGamma deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=2`. It is invariant to scaling of\nthe target variable, and measures relative errors.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true > 0.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_gamma_deviance\n>>> y_true = [2, 0.5, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_gamma_deviance(y_true, y_pred)\n1.0568..." - } - ] - }, - { - "name": "sklearn.metrics._scorer", - "imports": [ - "from collections.abc import Iterable", - "from functools import partial", - "from collections import Counter", - "import numpy as np", - "from None import r2_score", - "from None import median_absolute_error", - "from None import max_error", - "from None import mean_absolute_error", - "from None import mean_squared_error", - "from None import mean_squared_log_error", - "from None import mean_poisson_deviance", - "from None import mean_gamma_deviance", - "from None import accuracy_score", - "from None import top_k_accuracy_score", - "from None import f1_score", - "from None import roc_auc_score", - "from None import average_precision_score", - "from None import precision_score", - "from None import recall_score", - "from None import log_loss", - "from None import balanced_accuracy_score", - "from None import explained_variance_score", - "from None import brier_score_loss", - "from None import jaccard_score", - "from None import mean_absolute_percentage_error", - "from cluster import adjusted_rand_score", - "from cluster import rand_score", - "from cluster import homogeneity_score", - "from cluster import completeness_score", - "from cluster import v_measure_score", - "from cluster import mutual_info_score", - "from cluster import adjusted_mutual_info_score", - "from cluster import normalized_mutual_info_score", - "from cluster import fowlkes_mallows_score", - "from utils.multiclass import type_of_target", - "from utils.validation import _deprecate_positional_args", - "from base import is_regressor" - ], - "classes": [ - { - "name": "_MultimetricScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "scorers", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary mapping names to callable scorers." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted target values." - }, - { - "name": "_use_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n - `_ProbaScorer` will be called twice.\n - `_PredictScorer` will be called twice.\n - `_ThresholdScorer` will be called twice.\n - `_ThresholdScorer` and `_PredictScorer` are called and\n estimator is a regressor.\n - `_ThresholdScorer` and `_ProbaScorer` are called and\n estimator does not have a `decision_function` attribute." - } - ], - "docstring": "Callable for multimetric scoring used to avoid repeated calls\nto `predict_proba`, `predict`, and `decision_function`.\n\n`_MultimetricScorer` will return a dictionary of scores corresponding to\nthe scorers in the dictionary. Note that `_MultimetricScorer` can be\ncreated with a dictionary with one key (i.e. only one actual scorer).\n\nParameters\n----------\nscorers : dict\n Dictionary mapping names to callable scorers." - }, - { - "name": "_BaseScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_select_proba_binary", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prediction given by `predict_proba`." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class labels for the estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Select the column of the positive label in `y_pred` when\nprobabilities are provided.\n\nParameters\n----------\ny_pred : ndarray of shape (n_samples, n_classes)\n The prediction given by `predict_proba`.\n\nclasses : ndarray of shape (n_classes,)\n The class labels for the estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Probability predictions of the positive class." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained estimator to use for scoring. Must have a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to estimator.predict." - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted target values for X relative to y_true.\n\nParameters\n----------\nestimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\ny_true : array-like\n Gold standard target values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - }, - { - "name": "_factory_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return non-default make_scorer arguments for repr." - } - ], - "docstring": null - }, - { - "name": "_PredictScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_score", - "decorators": [], - "parameters": [ - { - "name": "method_caller", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Returns predictions given an estimator, method name, and other arguments, potentially caching results." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained estimator to use for scoring. Must have a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to estimator.predict." - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted target values for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nestimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\ny_true : array-like\n Gold standard target values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - } - ], - "docstring": null - }, - { - "name": "_ProbaScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_score", - "decorators": [], - "parameters": [ - { - "name": "method_caller", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Returns predictions given an estimator, method name, and other arguments, potentially caching results." - }, - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained classifier to use for scoring. Must have a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to clf.predict_proba." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X. These must be class labels, not probabilities." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate predicted probabilities for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nclf : object\n Trained classifier to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to clf.predict_proba.\n\ny : array-like\n Gold standard target values for X. These must be class labels,\n not probabilities.\n\nsample_weight : array-like, default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - }, - { - "name": "_factory_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_ThresholdScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_score", - "decorators": [], - "parameters": [ - { - "name": "method_caller", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Returns predictions given an estimator, method name, and other arguments, potentially caching results." - }, - { - "name": "clf", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Trained classifier to use for scoring. Must have either a decision_function method or a predict_proba method; the output of that is used to compute the score." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test data that will be fed to clf.decision_function or clf.predict_proba." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gold standard target values for X. These must be class labels, not decision function values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate decision function output for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nclf : object\n Trained classifier to use for scoring. Must have either a\n decision_function method or a predict_proba method; the output of\n that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to clf.decision_function or\n clf.predict_proba.\n\ny : array-like\n Gold standard target values for X. These must be class labels,\n not decision function values.\n\nsample_weight : array-like, default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X." - }, - { - "name": "_factory_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_cached_call", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call estimator with method and args and kwargs." - }, - { - "name": "get_scorer", - "decorators": [], - "parameters": [ - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Scoring method as string. If callable it is returned as is." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a scorer from string.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscoring : str or callable\n Scoring method as string. If callable it is returned as is.\n\nReturns\n-------\nscorer : callable\n The scorer." - }, - { - "name": "_passthrough_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function that wraps estimator.score" - }, - { - "name": "check_scoring", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "allow_none", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If no scoring is specified and the estimator has no score function, we can either return None or raise an exception." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine scorer from user options.\n\nA TypeError will be thrown if the estimator cannot be scored.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nscoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nallow_none : bool, default=False\n If no scoring is specified and the estimator has no score function, we\n can either return None or raise an exception.\n\nReturns\n-------\nscoring : callable\n A scorer callable object / function with signature\n ``scorer(estimator, X, y)``." - }, - { - "name": "_check_multimetric_scoring", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator for which the scoring will be applied." - }, - { - "name": "scoring", - "type": "Union[List, Tuple[], Dict]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. See :ref:`multimetric_grid_search` for an example." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the scoring parameter in cases when multiple metrics are allowed.\n\nParameters\n----------\nestimator : sklearn estimator instance\n The estimator for which the scoring will be applied.\n\nscoring : list, tuple or dict\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n See :ref:`multimetric_grid_search` for an example.\n\nReturns\n-------\nscorers_dict : dict\n A dict mapping each scorer name to its validated scorer." - }, - { - "name": "make_scorer", - "decorators": [], - "parameters": [ - { - "name": "score_func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Score function (or loss function) with signature ``score_func(y, y_pred, **kwargs)``." - }, - { - "name": "greater_is_better", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether score_func is a score function (default), meaning high is good, or a loss function, meaning low is good. In the latter case, the scorer object will sign-flip the outcome of the score_func." - }, - { - "name": "needs_proba", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether score_func requires predict_proba to get probability estimates out of a classifier. If True, for binary `y_true`, the score function is supposed to accept a 1D `y_pred` (i.e., probability of the positive class, shape `(n_samples,)`)." - }, - { - "name": "needs_threshold", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether score_func takes a continuous decision certainty. This only works for binary classification using estimators that have either a decision_function or predict_proba method. If True, for binary `y_true`, the score function is supposed to accept a 1D `y_pred` (i.e., probability of the positive class or the decision function, shape `(n_samples,)`). For example ``average_precision`` or the area under the roc curve can not be computed using discrete predictions alone." - }, - { - "name": "**kwargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters to be passed to score_func." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make a scorer from a performance metric or loss function.\n\nThis factory function wraps scoring functions for use in\n:class:`~sklearn.model_selection.GridSearchCV` and\n:func:`~sklearn.model_selection.cross_val_score`.\nIt takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n:func:`~sklearn.metrics.mean_squared_error`,\n:func:`~sklearn.metrics.adjusted_rand_index` or\n:func:`~sklearn.metrics.average_precision`\nand returns a callable that scores an estimator's output.\nThe signature of the call is `(estimator, X, y)` where `estimator`\nis the model to be evaluated, `X` is the data and `y` is the\nground truth labeling (or `None` in the case of unsupervised models).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable\n Score function (or loss function) with signature\n ``score_func(y, y_pred, **kwargs)``.\n\ngreater_is_better : bool, default=True\n Whether score_func is a score function (default), meaning high is good,\n or a loss function, meaning low is good. In the latter case, the\n scorer object will sign-flip the outcome of the score_func.\n\nneeds_proba : bool, default=False\n Whether score_func requires predict_proba to get probability estimates\n out of a classifier.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class, shape\n `(n_samples,)`).\n\nneeds_threshold : bool, default=False\n Whether score_func takes a continuous decision certainty.\n This only works for binary classification using estimators that\n have either a decision_function or predict_proba method.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class or the decision\n function, shape `(n_samples,)`).\n\n For example ``average_precision`` or the area under the roc curve\n can not be computed using discrete predictions alone.\n\n**kwargs : additional arguments\n Additional parameters to be passed to score_func.\n\nReturns\n-------\nscorer : callable\n Callable object that returns a scalar score; greater is better.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score, make_scorer\n>>> ftwo_scorer = make_scorer(fbeta_score, beta=2)\n>>> ftwo_scorer\nmake_scorer(fbeta_score, beta=2)\n>>> from sklearn.model_selection import GridSearchCV\n>>> from sklearn.svm import LinearSVC\n>>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},\n... scoring=ftwo_scorer)\n\nNotes\n-----\nIf `needs_proba=False` and `needs_threshold=False`, the score\nfunction is supposed to accept the output of :term:`predict`. If\n`needs_proba=True`, the score function is supposed to accept the\noutput of :term:`predict_proba` (For binary `y_true`, the score function is\nsupposed to accept probability of the positive class). If\n`needs_threshold=True`, the score function is supposed to accept the\noutput of :term:`decision_function`." - } - ] - }, - { - "name": "sklearn.metrics", - "imports": [ - "from _ranking import auc", - "from _ranking import average_precision_score", - "from _ranking import coverage_error", - "from _ranking import det_curve", - "from _ranking import dcg_score", - "from _ranking import label_ranking_average_precision_score", - "from _ranking import label_ranking_loss", - "from _ranking import ndcg_score", - "from _ranking import precision_recall_curve", - "from _ranking import roc_auc_score", - "from _ranking import roc_curve", - "from _ranking import top_k_accuracy_score", - "from _classification import accuracy_score", - "from _classification import balanced_accuracy_score", - "from _classification import classification_report", - "from _classification import cohen_kappa_score", - "from _classification import confusion_matrix", - "from _classification import f1_score", - "from _classification import fbeta_score", - "from _classification import hamming_loss", - "from _classification import hinge_loss", - "from _classification import jaccard_score", - "from _classification import log_loss", - "from _classification import matthews_corrcoef", - "from _classification import precision_recall_fscore_support", - "from _classification import precision_score", - "from _classification import recall_score", - "from _classification import zero_one_loss", - "from _classification import brier_score_loss", - "from _classification import multilabel_confusion_matrix", - "from None import cluster", - "from cluster import adjusted_mutual_info_score", - "from cluster import adjusted_rand_score", - "from cluster import rand_score", - "from cluster import pair_confusion_matrix", - "from cluster import completeness_score", - "from cluster import consensus_score", - "from cluster import homogeneity_completeness_v_measure", - "from cluster import homogeneity_score", - "from cluster import mutual_info_score", - "from cluster import normalized_mutual_info_score", - "from cluster import fowlkes_mallows_score", - "from cluster import silhouette_samples", - "from cluster import silhouette_score", - "from cluster import calinski_harabasz_score", - "from cluster import v_measure_score", - "from cluster import davies_bouldin_score", - "from pairwise import euclidean_distances", - "from pairwise import nan_euclidean_distances", - "from pairwise import pairwise_distances", - "from pairwise import pairwise_distances_argmin", - "from pairwise import pairwise_distances_argmin_min", - "from pairwise import pairwise_kernels", - "from pairwise import pairwise_distances_chunked", - "from _regression import explained_variance_score", - "from _regression import max_error", - "from _regression import mean_absolute_error", - "from _regression import mean_squared_error", - "from _regression import mean_squared_log_error", - "from _regression import median_absolute_error", - "from _regression import mean_absolute_percentage_error", - "from _regression import r2_score", - "from _regression import mean_tweedie_deviance", - "from _regression import mean_poisson_deviance", - "from _regression import mean_gamma_deviance", - "from _scorer import check_scoring", - "from _scorer import make_scorer", - "from _scorer import SCORERS", - "from _scorer import get_scorer", - "from _plot.det_curve import plot_det_curve", - "from _plot.det_curve import DetCurveDisplay", - "from _plot.roc_curve import plot_roc_curve", - "from _plot.roc_curve import RocCurveDisplay", - "from _plot.precision_recall_curve import plot_precision_recall_curve", - "from _plot.precision_recall_curve import PrecisionRecallDisplay", - "from _plot.confusion_matrix import plot_confusion_matrix", - "from _plot.confusion_matrix import ConfusionMatrixDisplay" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.cluster.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster._bicluster", - "imports": [ - "import numpy as np", - "from scipy.optimize import linear_sum_assignment", - "from utils.validation import check_consistent_length", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "_check_rows_and_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Unpacks the row and column arrays and checks their shape." - }, - { - "name": "_jaccard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Jaccard coefficient on the elements of the two biclusters." - }, - { - "name": "_pairwise_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes pairwise similarity matrix.\n\nresult[i, j] is the Jaccard coefficient of a's bicluster i and b's\nbicluster j." - }, - { - "name": "consensus_score", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tuple of row and column indicators for a set of biclusters." - }, - { - "name": "b", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Another set of biclusters like ``a``." - }, - { - "name": "similarity", - "type": "Union[Callable, Literal['jaccard']]", - "hasDefault": true, - "default": "'jaccard'", - "limitation": null, - "ignored": false, - "docstring": "May be the string \"jaccard\" to use the Jaccard coefficient, or any function that takes four arguments, each of which is a 1d indicator vector: (a_rows, a_columns, b_rows, b_columns)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\na : (rows, columns)\n Tuple of row and column indicators for a set of biclusters.\n\nb : (rows, columns)\n Another set of biclusters like ``a``.\n\nsimilarity : 'jaccard' or callable, default='jaccard'\n May be the string \"jaccard\" to use the Jaccard coefficient, or\n any function that takes four arguments, each of which is a 1d\n indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\nReferences\n----------\n\n* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n for bicluster acquisition\n `__." - } - ] - }, - { - "name": "sklearn.metrics.cluster._supervised", - "imports": [ - "import warnings", - "from math import log", - "import numpy as np", - "from scipy import sparse as sp", - "from _expected_mutual_info_fast import expected_mutual_information", - "from utils.fixes import _astype_copy_false", - "from utils.multiclass import type_of_target", - "from utils.validation import _deprecate_positional_args", - "from utils.validation import check_array", - "from utils.validation import check_consistent_length" - ], - "classes": [], - "functions": [ - { - "name": "check_clusterings", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The true labels." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The predicted labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the labels arrays are 1D and of same dimension.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,)\n The true labels.\n\nlabels_pred : array-like of shape (n_samples,)\n The predicted labels." - }, - { - "name": "_generalized_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a particular mean of two numbers." - }, - { - "name": "contingency_matrix", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate." - }, - { - "name": "eps", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If a float, that value is added to all values in the contingency matrix. This helps to stop NaN propagation. If ``None``, nothing is adjusted." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If `True`, return a sparse CSR continency matrix. If `eps` is not `None` and `sparse` is `True` will raise ValueError. .. versionadded:: 0.18" - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Output dtype. Ignored if `eps` is not `None`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build a contingency matrix describing the relationship between labels.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate.\n\neps : float, default=None\n If a float, that value is added to all values in the contingency\n matrix. This helps to stop NaN propagation.\n If ``None``, nothing is adjusted.\n\nsparse : bool, default=False\n If `True`, return a sparse CSR continency matrix. If `eps` is not\n `None` and `sparse` is `True` will raise ValueError.\n\n .. versionadded:: 0.18\n\ndtype : numeric type, default=np.int64\n Output dtype. Ignored if `eps` is not `None`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncontingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]\n Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in\n true class :math:`i` and in predicted class :math:`j`. If\n ``eps is None``, the dtype of this array will be integer unless set\n otherwise with the ``dtype`` argument. If ``eps`` is given, the dtype\n will be float.\n Will be a ``sklearn.sparse.csr_matrix`` if ``sparse=True``." - }, - { - "name": "pair_confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pair confusion matrix arising from two clusterings.\n\nThe pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix\nbetween two clusterings by considering all pairs of samples and counting\npairs that are assigned into the same or into different clusters under\nthe true and predicted clusterings.\n\nConsidering a pair of samples that is clustered together a positive pair,\nthen as in binary classification the count of true negatives is\n:math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is\n:math:`C_{11}` and false positives is :math:`C_{01}`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\nReturns\n-------\nC : ndarray of shape (2, 2), dtype=np.int64\n The contingency matrix.\n\nSee Also\n--------\nrand_score: Rand Score\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have all non-zero entries on the\ndiagonal regardless of actual label values:\n\n >>> from sklearn.metrics.cluster import pair_confusion_matrix\n >>> pair_confusion_matrix([0, 0, 1, 1], [1, 1, 0, 0])\n array([[8, 0],\n [0, 4]]...\n\nLabelings that assign all classes members to the same clusters\nare complete but may be not always pure, hence penalized, and\nhave some off-diagonal non-zero entries:\n\n >>> pair_confusion_matrix([0, 0, 1, 2], [0, 0, 1, 1])\n array([[8, 2],\n [0, 2]]...\n\nNote that the matrix is not symmetric.\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075" - }, - { - "name": "rand_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference." - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\nReturns\n-------\nRI : float\n Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n perfect match.\n\nSee Also\n--------\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import rand_score\n >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized:\n\n >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.83...\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n.. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n.. https://en.wikipedia.org/wiki/Rand_index" - }, - { - "name": "adjusted_rand_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cluster labels to evaluate" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate\n\nReturns\n-------\nARI : float\n Similarity score between -1.0 and 1.0. Random labelings have an ARI\n close to 0.0. 1.0 stands for perfect match.\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import adjusted_rand_score\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.57...\n\nARI is symmetric, so labelings that have pure clusters with members\ncoming from the same classes but unnecessary splits are penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n 0.57...\n\nIf classes members are completely split across different clusters, the\nassignment is totally incomplete, hence the ARI is very low::\n\n >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\nReferences\n----------\n.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n Journal of Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n.. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n adjusted Rand index, Psychological Methods 2004\n\n.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted Mutual Information." - }, - { - "name": "homogeneity_completeness_v_measure", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Ratio of weight attributed to ``homogeneity`` vs ``completeness``. If ``beta`` is greater than 1, ``completeness`` is weighted more strongly in the calculation. If ``beta`` is less than 1, ``homogeneity`` is weighted more strongly." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the homogeneity and completeness and V-Measure scores at once.\n\nThose metrics are based on normalized conditional entropy measures of\nthe clustering labeling to evaluate given the knowledge of a Ground\nTruth class labels of the same samples.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nBoth scores have positive values between 0.0 and 1.0, larger values\nbeing desirable.\n\nThose 3 metrics are independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore values in any way.\n\nV-Measure is furthermore symmetric: swapping ``labels_true`` and\n``label_pred`` will give the same score. This does not hold for\nhomogeneity and completeness. V-Measure is identical to\n:func:`normalized_mutual_info_score` with the arithmetic averaging\nmethod.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nbeta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nhomogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\ncompleteness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nv_measure : float\n harmonic mean of the first two\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nv_measure_score" - }, - { - "name": "homogeneity_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Homogeneity metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`completeness_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nReturns\n-------\nhomogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\ncompleteness_score\nv_measure_score\n\nExamples\n--------\n\nPerfect labelings are homogeneous::\n\n >>> from sklearn.metrics.cluster import homogeneity_score\n >>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nNon-perfect labelings that further split classes into more clusters can be\nperfectly homogeneous::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 1.000000\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 1.000000\n\nClusters that include samples from different classes do not make for an\nhomogeneous labeling::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0...\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0..." - }, - { - "name": "completeness_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Completeness metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`homogeneity_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nReturns\n-------\ncompleteness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\nhomogeneity_score\nv_measure_score\n\nExamples\n--------\n\nPerfect labelings are complete::\n\n >>> from sklearn.metrics.cluster import completeness_score\n >>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nNon-perfect labelings that assign all classes members to the same clusters\nare still complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 1.0\n >>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.999...\n\nIf classes members are split across different clusters, the\nassignment cannot be complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0\n >>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0" - }, - { - "name": "v_measure_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "ground truth class labels to be used as a reference" - }, - { - "name": "labels_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "cluster labels to evaluate" - }, - { - "name": "beta", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Ratio of weight attributed to ``homogeneity`` vs ``completeness``. If ``beta`` is greater than 1, ``completeness`` is weighted more strongly in the calculation. If ``beta`` is less than 1, ``homogeneity`` is weighted more strongly." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n v = (1 + beta) * homogeneity * completeness\n / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nbeta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nv_measure : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nnormalized_mutual_info_score\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have score 1.0::\n\n >>> from sklearn.metrics.cluster import v_measure_score\n >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete be not homogeneous, hence penalized::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.66...\n\nLabelings that have pure clusters with members coming from the same\nclasses are homogeneous but un-necessary splits harms completeness\nand thus penalize V-measure as well::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 0.66...\n\nIf classes members are completely split across different clusters,\nthe assignment is totally incomplete, hence the V-Measure is null::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0...\n\nClusters that include samples from totally different classes totally\ndestroy the homogeneity of the labeling, hence::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0..." - }, - { - "name": "mutual_info_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "contingency", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A contingency matrix given by the :func:`contingency_matrix` function. If value is ``None``, it will be computed, otherwise the given value is used, with ``labels_true`` and ``labels_pred`` ignored." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels of\nthe same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\n MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\ncontingency : {ndarray, sparse matrix} of shape (n_classes_true, n_classes_pred), default=None\n A contingency matrix given by the :func:`contingency_matrix` function.\n If value is ``None``, it will be computed, otherwise the given value is\n used, with ``labels_true`` and ``labels_pred`` ignored.\n\nReturns\n-------\nmi : float\n Mutual information, a non-negative value\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted against chance Mutual Information.\nnormalized_mutual_info_score : Normalized Mutual Information." - }, - { - "name": "adjusted_mutual_info_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "average_method", - "type": "str", - "hasDefault": true, - "default": "'arithmetic'", - "limitation": null, - "ignored": false, - "docstring": "How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. .. versionadded:: 0.20 .. versionchanged:: 0.22 The default value of ``average_method`` changed from 'max' to 'arithmetic'." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\naverage_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'max' to\n 'arithmetic'.\n\nReturns\n-------\nami: float (upperlimited by 1.0)\n The AMI returns a value of 1 when the two partitions are identical\n (ie perfectly matched). Random partitions (independent labellings) have\n an expected AMI around 0 on average hence can be negative.\n\nSee Also\n--------\nadjusted_rand_score : Adjusted Rand Index.\nmutual_info_score : Mutual Information (not adjusted for chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the AMI is null::\n\n >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0\n\nReferences\n----------\n.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n Clusterings Comparison: Variants, Properties, Normalization and\n Correction for Chance, JMLR\n `_\n\n.. [2] `Wikipedia entry for the Adjusted Mutual Information\n `_" - }, - { - "name": "normalized_mutual_info_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "average_method", - "type": "str", - "hasDefault": true, - "default": "'arithmetic'", - "limitation": null, - "ignored": false, - "docstring": "How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. .. versionadded:: 0.20 .. versionchanged:: 0.22 The default value of ``average_method`` changed from 'geometric' to 'arithmetic'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Normalized Mutual Information between two clusterings.\n\nNormalized Mutual Information (NMI) is a normalization of the Mutual\nInformation (MI) score to scale the results between 0 (no mutual\ninformation) and 1 (perfect correlation). In this function, mutual\ninformation is normalized by some generalized mean of ``H(labels_true)``\nand ``H(labels_pred))``, defined by the `average_method`.\n\nThis measure is not adjusted for chance. Therefore\n:func:`adjusted_mutual_info_score` might be preferred.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\naverage_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'geometric' to\n 'arithmetic'.\n\nReturns\n-------\nnmi : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nSee Also\n--------\nv_measure_score : V-Measure (NMI with arithmetic mean option).\nadjusted_rand_score : Adjusted Rand Index.\nadjusted_mutual_info_score : Adjusted Mutual Information (adjusted\n against chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import normalized_mutual_info_score\n >>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the NMI is null::\n\n >>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0" - }, - { - "name": "fowlkes_mallows_score", - "decorators": [], - "parameters": [ - { - "name": "labels_true", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "labels_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A clustering of the data into disjoint subsets." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Compute contingency matrix internally with sparse matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = (``n_samples``,)\n A clustering of the data into disjoint subsets.\n\nlabels_pred : array, shape = (``n_samples``, )\n A clustering of the data into disjoint subsets.\n\nsparse : bool, default=False\n Compute contingency matrix internally with sparse matrix.\n\nReturns\n-------\nscore : float\n The resulting Fowlkes-Mallows score.\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally random, hence the FMI is null::\n\n >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\nReferences\n----------\n.. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n hierarchical clusterings\". Journal of the American Statistical\n Association\n `_\n\n.. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n `_" - }, - { - "name": "entropy", - "decorators": [], - "parameters": [ - { - "name": "labels", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The labels" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculates the entropy for a labeling.\n\nParameters\n----------\nlabels : int array, shape = [n_samples]\n The labels\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e)." - } - ] - }, - { - "name": "sklearn.metrics.cluster._unsupervised", - "imports": [ - "import functools", - "import numpy as np", - "from utils import check_random_state", - "from utils import check_X_y", - "from utils import _safe_indexing", - "from pairwise import pairwise_distances_chunked", - "from pairwise import pairwise_distances", - "from preprocessing import LabelEncoder", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "check_number_of_labels", - "decorators": [], - "parameters": [ - { - "name": "n_labels", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of labels." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that number of labels are valid.\n\nParameters\n----------\nn_labels : int\n Number of labels.\n\nn_samples : int\n Number of samples." - }, - { - "name": "silhouette_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An array of pairwise distances between samples, or a feature array." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels for each sample." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`metrics.pairwise.pairwise_distances `. If ``X`` is the distance array itself, use ``metric=\"precomputed\"``." - }, - { - "name": "sample_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the sample to use when computing the Silhouette Coefficient on a random subset of the data. If ``sample_size is None``, no sampling is used." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for selecting a subset of samples. Used when ``sample_size is not None``. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "**kwds", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a scipy.spatial.distance metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the mean Silhouette Coefficient of all samples.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``. To clarify, ``b`` is the distance between a sample and the nearest\ncluster that the sample is not a part of.\nNote that Silhouette Coefficient is only defined if number of labels\nis ``2 <= n_labels <= n_samples - 1``.\n\nThis function returns the mean Silhouette Coefficient over all samples.\nTo obtain the values for each sample, use :func:`silhouette_samples`.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters. Negative values generally indicate that a sample has\nbeen assigned to the wrong cluster, as a different cluster is more similar.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`metrics.pairwise.pairwise_distances\n `. If ``X`` is\n the distance array itself, use ``metric=\"precomputed\"``.\n\nsample_size : int, default=None\n The size of the sample to use when computing the Silhouette Coefficient\n on a random subset of the data.\n If ``sample_size is None``, no sampling is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for selecting a subset of samples.\n Used when ``sample_size is not None``.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : float\n Mean Silhouette Coefficient for all samples.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n `_" - }, - { - "name": "_silhouette_reduce", - "decorators": [], - "parameters": [ - { - "name": "D_chunk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precomputed distances for a chunk." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First index in the chunk." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Corresponding cluster labels, encoded as {0, ..., n_clusters-1}." - }, - { - "name": "label_freqs", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distribution of cluster labels in ``labels``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Accumulate silhouette statistics for vertical chunk of X.\n\nParameters\n----------\nD_chunk : array-like of shape (n_chunk_samples, n_samples)\n Precomputed distances for a chunk.\nstart : int\n First index in the chunk.\nlabels : array-like of shape (n_samples,)\n Corresponding cluster labels, encoded as {0, ..., n_clusters-1}.\nlabel_freqs : array-like\n Distribution of cluster labels in ``labels``." - }, - { - "name": "silhouette_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An array of pairwise distances between samples, or a feature array." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Label values for each sample." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'euclidean'", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If ``X`` is the distance array itself, use \"precomputed\" as the metric. Precomputed distance matrices must have 0 along the diagonal." - }, - { - "name": "`**kwds`", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Any further parameters are passed directly to the distance function. If using a ``scipy.spatial.distance`` metric, the parameters are still metric dependent. See the scipy docs for usage examples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Silhouette Coefficient for each sample.\n\nThe Silhouette Coefficient is a measure of how well samples are clustered\nwith samples that are similar to themselves. Clustering models with a high\nSilhouette Coefficient are said to be dense, where samples in the same\ncluster are similar to each other, and well separated, where samples in\ndifferent clusters are not very similar to each other.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``.\nNote that Silhouette Coefficient is only defined if number of labels\nis 2 ``<= n_labels <= n_samples - 1``.\n\nThis function returns the Silhouette Coefficient for each sample.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n Label values for each sample.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`.\n If ``X`` is the distance array itself, use \"precomputed\" as the metric.\n Precomputed distance matrices must have 0 along the diagonal.\n\n`**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a ``scipy.spatial.distance`` metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : array-like of shape (n_samples,)\n Silhouette Coefficients for each sample.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n `_" - }, - { - "name": "calinski_harabasz_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of ``n_features``-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels for each sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Calinski and Harabasz score.\n\nIt is also known as the Variance Ratio Criterion.\n\nThe score is defined as ratio between the within-cluster dispersion and\nthe between-cluster dispersion.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nReturns\n-------\nscore : float\n The resulting Calinski-Harabasz score.\n\nReferences\n----------\n.. [1] `T. Calinski and J. Harabasz, 1974. \"A dendrite method for cluster\n analysis\". Communications in Statistics\n `_" - }, - { - "name": "davies_bouldin_score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of ``n_features``-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted labels for each sample." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the Davies-Bouldin score.\n\nThe score is defined as the average similarity measure of each cluster with\nits most similar cluster, where similarity is the ratio of within-cluster\ndistances to between-cluster distances. Thus, clusters which are farther\napart and less dispersed will result in a better score.\n\nThe minimum score is zero, with lower values indicating better clustering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nReturns\n-------\nscore: float\n The resulting Davies-Bouldin score.\n\nReferences\n----------\n.. [1] Davies, David L.; Bouldin, Donald W. (1979).\n `\"A Cluster Separation Measure\"\n `__.\n IEEE Transactions on Pattern Analysis and Machine Intelligence.\n PAMI-1 (2): 224-227" - } - ] - }, - { - "name": "sklearn.metrics.cluster", - "imports": [ - "from _supervised import adjusted_mutual_info_score", - "from _supervised import normalized_mutual_info_score", - "from _supervised import adjusted_rand_score", - "from _supervised import rand_score", - "from _supervised import completeness_score", - "from _supervised import contingency_matrix", - "from _supervised import pair_confusion_matrix", - "from _supervised import expected_mutual_information", - "from _supervised import homogeneity_completeness_v_measure", - "from _supervised import homogeneity_score", - "from _supervised import mutual_info_score", - "from _supervised import v_measure_score", - "from _supervised import fowlkes_mallows_score", - "from _supervised import entropy", - "from _unsupervised import silhouette_samples", - "from _unsupervised import silhouette_score", - "from _unsupervised import calinski_harabasz_score", - "from _unsupervised import davies_bouldin_score", - "from _bicluster import consensus_score" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.cluster.tests.test_bicluster", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.metrics.cluster._bicluster import _jaccard", - "from sklearn.metrics import consensus_score" - ], - "classes": [], - "functions": [ - { - "name": "test_jaccard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consensus_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consensus_score_issue2445", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Different number of biclusters in A and B" - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests.test_common", - "imports": [ - "from functools import partial", - "import pytest", - "import numpy as np", - "from sklearn.metrics.cluster import adjusted_mutual_info_score", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.metrics.cluster import rand_score", - "from sklearn.metrics.cluster import completeness_score", - "from sklearn.metrics.cluster import fowlkes_mallows_score", - "from sklearn.metrics.cluster import homogeneity_score", - "from sklearn.metrics.cluster import mutual_info_score", - "from sklearn.metrics.cluster import normalized_mutual_info_score", - "from sklearn.metrics.cluster import v_measure_score", - "from sklearn.metrics.cluster import silhouette_score", - "from sklearn.metrics.cluster import calinski_harabasz_score", - "from sklearn.metrics.cluster import davies_bouldin_score", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_symmetric_non_symmetric_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_symmetry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_symmetry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalized_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permute_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_format_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inf_nan_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests.test_supervised", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.metrics.cluster import adjusted_mutual_info_score", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.metrics.cluster import rand_score", - "from sklearn.metrics.cluster import completeness_score", - "from sklearn.metrics.cluster import contingency_matrix", - "from sklearn.metrics.cluster import pair_confusion_matrix", - "from sklearn.metrics.cluster import entropy", - "from sklearn.metrics.cluster import expected_mutual_information", - "from sklearn.metrics.cluster import fowlkes_mallows_score", - "from sklearn.metrics.cluster import homogeneity_completeness_v_measure", - "from sklearn.metrics.cluster import homogeneity_score", - "from sklearn.metrics.cluster import mutual_info_score", - "from sklearn.metrics.cluster import normalized_mutual_info_score", - "from sklearn.metrics.cluster import v_measure_score", - "from sklearn.metrics.cluster._supervised import _generalized_average", - "from sklearn.metrics.cluster._supervised import check_clusterings", - "from sklearn.utils import assert_all_finite", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import ignore_warnings", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_error_messages_on_wrong_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_generalized_average", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_perfect_matches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_homogeneous_but_not_complete_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complete_but_not_homogeneous_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_complete_and_not_homogeneous_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_beta_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_consecutive_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "uniform_labelings_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adjustment_for_chance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adjusted_mutual_info_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_expected_mutual_info_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_int_overflow_mutual_info_fowlkes_mallows_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_entropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_contingency_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_contingency_matrix_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_exactly_zero_info_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_v_measure_and_mutual_information", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fowlkes_mallows_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fowlkes_mallows_score_properties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mutual_info_score_positive_constant_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_clustering_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pair_confusion_matrix_fully_dispersed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pair_confusion_matrix_single_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pair_confusion_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rand_score_edge_cases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rand_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests.test_unsupervised", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from scipy.sparse import csr_matrix", - "from sklearn import datasets", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.metrics.cluster import silhouette_score", - "from sklearn.metrics.cluster import silhouette_samples", - "from sklearn.metrics import pairwise_distances", - "from sklearn.metrics.cluster import calinski_harabasz_score", - "from sklearn.metrics.cluster import davies_bouldin_score" - ], - "classes": [], - "functions": [ - { - "name": "test_silhouette", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cluster_size_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_silhouette_paper_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_labelsize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_encoded_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_numpy_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_silhouette_nonzero_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_raises_on_only_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert message when there is only one label" - }, - { - "name": "assert_raises_on_all_points_same_cluster", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert message when all point are in different clusters" - }, - { - "name": "test_calinski_harabasz_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_davies_bouldin_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.cluster.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics.tests.test_classification", - "imports": [ - "from functools import partial", - "from itertools import product", - "from itertools import chain", - "from itertools import permutations", - "import warnings", - "import numpy as np", - "from scipy import linalg", - "import pytest", - "from sklearn import datasets", - "from sklearn import svm", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.preprocessing import label_binarize", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_warns_div0", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import balanced_accuracy_score", - "from sklearn.metrics import classification_report", - "from sklearn.metrics import cohen_kappa_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import fbeta_score", - "from sklearn.metrics import hamming_loss", - "from sklearn.metrics import hinge_loss", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import matthews_corrcoef", - "from sklearn.metrics import precision_recall_fscore_support", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import zero_one_loss", - "from sklearn.metrics import brier_score_loss", - "from sklearn.metrics import multilabel_confusion_matrix", - "from sklearn.metrics._classification import _check_targets", - "from sklearn.exceptions import UndefinedMetricWarning", - "from scipy.spatial.distance import hamming as sp_hamming", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from pandas import Series", - "from pandas import DataFrame" - ], - "classes": [], - "functions": [ - { - "name": "make_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make some classification predictions on a toy dataset using a SVC\n\nIf binary is True restrict to a binary classification problem instead of a\nmulticlass classification problem" - }, - { - "name": "test_classification_report_dictionary_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_output_dict_empty_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_zero_division_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_accuracy_score_subset_accuracy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_binary_single_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_extra_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_ignored_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_score_non_binary_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_duplicate_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_tied_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_fscore_support_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f_unused_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_confusion_matrix_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_normalize_wrong_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_normalize_single_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cohen_kappa", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_against_numpy_corrcoef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_against_jurman", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_matthews_corrcoef_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_refcall_f1_score_multilabel_unordered_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_binary_averaged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_precision_recall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_multiclass_subset_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_on_zero_length_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_balanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_label_detection", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_string_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_unicode_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_multiclass_with_long_string_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_labels_target_names_unequal_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_report_no_labels_target_names_unequal_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_classification_report", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_zero_one_loss_subset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_hamming_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_jaccard_score_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_jaccard_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_jaccard_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_binary_jaccard_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_jaccard_score_zero_division_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_jaccard_score_zero_division_set_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_multilabel_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_multilabel_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_score_with_an_empty_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels_check_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels_average_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_f1_no_labels_average_none_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prf_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prf_no_warnings_if_zero_division_set", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_recall_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fscore_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prf_average_binary_data_non_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_targets_multiclass_with_both_y_true_and_y_pred_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_missing_labels_with_labels_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_with_missing_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_missing_labels_only_two_unq_in_y_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hinge_loss_multiclass_invariance_lists", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_loss_pandas_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_brier_score_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balanced_accuracy_score_unseen", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balanced_accuracy_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_common", - "imports": [ - "from functools import partial", - "from inspect import signature", - "from itertools import product", - "from itertools import chain", - "from itertools import permutations", - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils import shuffle", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import balanced_accuracy_score", - "from sklearn.metrics import brier_score_loss", - "from sklearn.metrics import cohen_kappa_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import coverage_error", - "from sklearn.metrics import det_curve", - "from sklearn.metrics import explained_variance_score", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import fbeta_score", - "from sklearn.metrics import hamming_loss", - "from sklearn.metrics import hinge_loss", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import label_ranking_average_precision_score", - "from sklearn.metrics import label_ranking_loss", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import max_error", - "from sklearn.metrics import matthews_corrcoef", - "from sklearn.metrics import mean_absolute_error", - "from sklearn.metrics import mean_absolute_percentage_error", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import mean_tweedie_deviance", - "from sklearn.metrics import mean_poisson_deviance", - "from sklearn.metrics import mean_gamma_deviance", - "from sklearn.metrics import median_absolute_error", - "from sklearn.metrics import multilabel_confusion_matrix", - "from sklearn.metrics import precision_recall_curve", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import roc_curve", - "from sklearn.metrics import zero_one_loss", - "from sklearn.metrics import ndcg_score", - "from sklearn.metrics import dcg_score", - "from sklearn.metrics import top_k_accuracy_score", - "from sklearn.metrics._base import _average_binary_score" - ], - "classes": [], - "functions": [ - { - "name": "precision_recall_curve_padded_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The dimensions of precision-recall pairs and the threshold array as\nreturned by the precision_recall_curve do not match. See\nfunc:`sklearn.metrics.precision_recall_curve`\n\nThis prevents implicit conversion of return value triple to an higher\ndimensional np.array of dtype('float64') (it will be of dtype('object)\ninstead). This again is needed for assert_array_equal to work correctly.\n\nAs a workaround we pad the threshold array with NaN values to match\nthe dimension of precision and recall arrays respectively." - }, - { - "name": "_require_positive_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make targets strictly positive" - }, - { - "name": "test_symmetry_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_symmetric_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_symmetric_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_order_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_order_invariance_multilabel_and_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_format_invariance_with_1d_vectors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_invariance_string_vs_numbers_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_invariance_string_vs_numbers_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_thresholded_inf_nan_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_inf_nan_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check that classification metrics raise a message mentioning the\noccurrence of non-finite values in the target vectors." - }, - { - "name": "test_classification_binary_continuous_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check that classification metrics raise a message of mixed type data\nwith continuous/binary target vectors." - }, - { - "name": "check_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_single_sample_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_single_sample_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_number_of_output_differ", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_regression_invariance_to_dimension_shuffling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_representation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raise_value_error_multilabel_sequences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option_binary_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option_multiclass_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize_option_multilabel_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_averaging", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_averaging", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multilabel_all_zeroes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_binary_multilabel_all_zeroes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_averaging_multilabel_all_ones", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_sample_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_averaging_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_label_permutations_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_multilabel_multioutput_permutations_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_metric_permutation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_metrics_consistent_type_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_metrics_pos_label_error_str", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_pairwise", - "imports": [ - "from types import GeneratorType", - "import numpy as np", - "from numpy import linalg", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "from scipy.spatial.distance import cosine", - "from scipy.spatial.distance import cityblock", - "from scipy.spatial.distance import minkowski", - "from scipy.spatial.distance import cdist", - "from scipy.spatial.distance import pdist", - "from scipy.spatial.distance import squareform", - "from scipy.spatial.distance import wminkowski", - "from scipy.spatial.distance import minkowski as wminkowski", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "import pytest", - "from sklearn import config_context", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.metrics.pairwise import euclidean_distances", - "from sklearn.metrics.pairwise import nan_euclidean_distances", - "from sklearn.metrics.pairwise import manhattan_distances", - "from sklearn.metrics.pairwise import haversine_distances", - "from sklearn.metrics.pairwise import linear_kernel", - "from sklearn.metrics.pairwise import chi2_kernel", - "from sklearn.metrics.pairwise import additive_chi2_kernel", - "from sklearn.metrics.pairwise import polynomial_kernel", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.metrics.pairwise import laplacian_kernel", - "from sklearn.metrics.pairwise import sigmoid_kernel", - "from sklearn.metrics.pairwise import cosine_similarity", - "from sklearn.metrics.pairwise import cosine_distances", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.metrics.pairwise import pairwise_distances_chunked", - "from sklearn.metrics.pairwise import pairwise_distances_argmin_min", - "from sklearn.metrics.pairwise import pairwise_distances_argmin", - "from sklearn.metrics.pairwise import pairwise_kernels", - "from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS", - "from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS", - "from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS", - "from sklearn.metrics.pairwise import PAIRED_DISTANCES", - "from sklearn.metrics.pairwise import check_pairwise_arrays", - "from sklearn.metrics.pairwise import check_paired_arrays", - "from sklearn.metrics.pairwise import paired_distances", - "from sklearn.metrics.pairwise import paired_euclidean_distances", - "from sklearn.metrics.pairwise import paired_manhattan_distances", - "from sklearn.metrics.pairwise import _euclidean_distances_upcast", - "from sklearn.preprocessing import normalize", - "from sklearn.exceptions import DataConversionWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_pairwise_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_boolean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_data_conversion_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_precomputed_non_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "callable_rbf_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_callable_nonstrict_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_kernels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_kernels_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_kernels_filter_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_distances_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_argmin_min", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reduce_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_reduce_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pairwise_distances_chunked", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_pairwise_distances_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_chunked", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_known_result", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_with_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_sym", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_upcast", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_upcast_sym", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_euclidean_distances_extreme_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_equal_to_euclidean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_infinite_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_2x2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_complete_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_not_trival", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nan_euclidean_distances_one_feature_match_positive", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cosine_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_haversine_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_euclidean_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_paired_manhattan_distances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_chi_square_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_symmetry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rbf_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_laplacian_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_similarity_sparse_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cosine_similarity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_dense_matrices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_XB_returned", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_different_dimensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_invalid_dimensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_sparse_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "tuplify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_tuple_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_preserve_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_distances_data_derived_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numeric_pairwise_distances_datatypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_ranking", - "imports": [ - "import re", - "import pytest", - "import numpy as np", - "import warnings", - "from scipy.sparse import csr_matrix", - "from sklearn import datasets", - "from sklearn import svm", - "from sklearn.utils.extmath import softmax", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.utils.validation import check_array", - "from sklearn.utils.validation import check_consistent_length", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import auc", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import coverage_error", - "from sklearn.metrics import det_curve", - "from sklearn.metrics import label_ranking_average_precision_score", - "from sklearn.metrics import precision_recall_curve", - "from sklearn.metrics import label_ranking_loss", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import roc_curve", - "from sklearn.metrics._ranking import _ndcg_sample_scores", - "from sklearn.metrics._ranking import _dcg_sample_scores", - "from sklearn.metrics import ndcg_score", - "from sklearn.metrics import dcg_score", - "from sklearn.metrics import top_k_accuracy_score", - "from sklearn.exceptions import UndefinedMetricWarning", - "from sklearn.model_selection import train_test_split", - "from sklearn.linear_model import LogisticRegression" - ], - "classes": [], - "functions": [ - { - "name": "make_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make some classification predictions on a toy dataset using a SVC\n\nIf binary is True restrict to a binary classification problem instead of a\nmulticlass classification problem" - }, - { - "name": "_auc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Alternative implementation to check for correctness of\n`roc_auc_score`." - }, - { - "name": "_average_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Alternative implementation to check for correctness of\n`average_precision_score`.\n\nNote that this implementation fails on some edge cases.\nFor example, for constant predictions e.g. [0.5, 0.5, 0.5],\ny_true = [1, 0, 0] returns an average precision of 0.33...\nbut y_true = [0, 0, 1] returns 1.0." - }, - { - "name": "_average_precision_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A second alternative implementation of average precision that closely\nfollows the Wikipedia article's definition (see References). This should\ngive identical results as `average_precision_score` for all inputs.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Average precision\n `_" - }, - { - "name": "_partial_roc_auc_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Alternative implementation to check for correctness of `roc_auc_score`\nwith `max_fpr` set." - }, - { - "name": "test_roc_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_end_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_returns_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_multi", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_confidence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_hard", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_drop_intermediate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_fpr_tpr_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auc_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_ovo_roc_auc_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_ovo_roc_auc_toydata_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_ovr_roc_auc_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_auc_score_multiclass_labels_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_auc_score_multiclass_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auc_score_non_binary_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_clf_curve_multiclass_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binary_clf_curve_implicit_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_precision_recall_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_constant_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_average_precision_score_pos_label_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_scale_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_toydata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_tie_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_sanity_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_constant_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_perfect_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_bad_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_det_curve_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_zero_or_all_relevant_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_error_raised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_only_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_lrap_without_tie_and_increasing_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_my_lrap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Simple implementation of label ranking average precision" - }, - { - "name": "check_alternative_lrap_implementation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_ranking_avp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lrap_error_raised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_alternative_lrap_implementation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lrap_sample_weighting_zero_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coverage_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_coverage_tie_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_ranking_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ranking_appropriate_input_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ranking_loss_ties_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dcg_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_dcg_score_for", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dcg_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_ignore_ties_with_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_invariant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_toy_examples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ndcg_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_test_ndcg_score_for", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_roc_auc_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k_accuracy_score_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_regression", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "from itertools import product", - "import pytest", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.metrics import explained_variance_score", - "from sklearn.metrics import mean_absolute_error", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import mean_squared_log_error", - "from sklearn.metrics import median_absolute_error", - "from sklearn.metrics import mean_absolute_percentage_error", - "from sklearn.metrics import max_error", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import mean_tweedie_deviance", - "from sklearn.metrics._regression import _check_reg_targets", - "from exceptions import UndefinedMetricWarning" - ], - "classes": [], - "functions": [ - { - "name": "test_regression_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_squared_error_multioutput_raw_value_squared", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_metrics_at_limits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_reg_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__check_reg_targets_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_multioutput_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_custom_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_single_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tweedie_deviance_continuity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_absolute_percentage_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests.test_score_objects", - "imports": [ - "from copy import deepcopy", - "import pickle", - "import tempfile", - "import shutil", - "import os", - "import numbers", - "from unittest.mock import Mock", - "from functools import partial", - "import numpy as np", - "import pytest", - "import joblib", - "from numpy.testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.base import BaseEstimator", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import brier_score_loss", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import fbeta_score", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import log_loss", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import cluster as cluster_module", - "from sklearn.metrics import check_scoring", - "from sklearn.metrics._scorer import _PredictScorer", - "from sklearn.metrics._scorer import _passthrough_scorer", - "from sklearn.metrics._scorer import _MultimetricScorer", - "from sklearn.metrics._scorer import _check_multimetric_scoring", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import get_scorer", - "from sklearn.metrics import SCORERS", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.svm import LinearSVC", - "from sklearn.pipeline import make_pipeline", - "from sklearn.cluster import KMeans", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Perceptron", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.datasets import load_diabetes", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.utils import shuffle" - ], - "classes": [ - { - "name": "EstimatorWithoutFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "EstimatorWithFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "EstimatorWithFitAndScore", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "EstimatorWithFitAndPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy estimator to test scoring validators" - }, - { - "name": "DummyScorer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy scorer that always returns 1." - } - ], - "functions": [ - { - "name": "_require_positive_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make targets strictly positive" - }, - { - "name": "_make_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "setup_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "teardown_module", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_scorers_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_scoring_validator_for_single_metric_usecases", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scoring_and_check_multimetric_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scoring_and_check_multimetric_scoring_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scoring_gridsearchcv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_scorers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_scorers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_thresholded_scorers_multilabel_indicator_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_supervised_cluster_scorers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_on_score_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_scorer_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_scorer_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorer_memmap_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scoring_is_not_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_calls_method_once", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_calls_method_once_classifier_no_decision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_calls_method_once_regressor_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multimetric_scorer_sanity_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_roc_proba_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_roc_proba_scorer_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_roc_no_proba_scorer_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "string_labeled_classification_problem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Train a classifier on binary problem with string target.\n\nThe classifier is trained on a binary classification problem where the\nminority class of interest has a string label that is intentionally not the\ngreatest class label using the lexicographic order. In this case, \"cancer\"\nis the positive label, and `classifier.classes_` is\n`[\"cancer\", \"not cancer\"]`.\n\nIn addition, the dataset is imbalanced to better identify problems when\nusing non-symmetric performance metrics such as f1-score, average precision\nand so on.\n\nReturns\n-------\nclassifier : estimator object\n Trained classifier on the binary problem.\nX_test : ndarray of shape (n_samples, n_features)\n Data to be used as testing set in tests.\ny_test : ndarray of shape (n_samples,), dtype=object\n Binary target where labels are strings.\ny_pred : ndarray of shape (n_samples,), dtype=object\n Prediction of `classifier` when predicting for `X_test`.\ny_pred_proba : ndarray of shape (n_samples, 2), dtype=np.float64\n Probabilities of `classifier` when predicting for `X_test`.\ny_pred_decision : ndarray of shape (n_samples,), dtype=np.float64\n Decision function values of `classifier` when predicting on `X_test`." - }, - { - "name": "test_average_precision_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_brier_score_loss_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_symmetric_metric_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorer_select_proba_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scorer_no_op_multiclass_select_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics._plot.base", - "imports": [ - "import numpy as np", - "from sklearn.base import is_classifier" - ], - "classes": [], - "functions": [ - { - "name": "_check_classifier_response_method", - "decorators": [], - "parameters": [ - { - "name": "estimator: object", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classifier to check" - }, - { - "name": "response_method: {'auto'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'predict_proba'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'decision_function'}", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return prediction method from the response_method\n\nParameters\n----------\nestimator: object\n Classifier to check\n\nresponse_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nReturns\n-------\nprediction_method: callable\n prediction method of estimator" - }, - { - "name": "_get_response", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "response_method: {'auto'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'predict_proba'", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "'decision_function'}", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the metrics. By default, `estimators.classes_[1]` is considered as the positive class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return response and positive label.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nresponse_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing\n the metrics. By default, `estimators.classes_[1]` is\n considered as the positive class.\n\nReturns\n-------\ny_pred: ndarray of shape (n_samples,)\n Target scores calculated from the provided response_method\n and pos_label.\n\npos_label: str or int\n The class considered as the positive class when computing\n the metrics." - } - ] - }, - { - "name": "sklearn.metrics._plot.confusion_matrix", - "imports": [ - "from itertools import product", - "import numpy as np", - "from None import confusion_matrix", - "from utils import check_matplotlib_support", - "from utils.multiclass import unique_labels", - "from utils.validation import _deprecate_positional_args", - "from base import is_classifier", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "ConfusionMatrixDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "confusion_matrix", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Confusion matrix." - }, - { - "name": "display_labels", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Display labels for plot. If None, display labels are set from 0 to `n_classes - 1`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "include_values", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Includes values in confusion matrix." - }, - { - "name": "cmap", - "type": "str", - "hasDefault": true, - "default": "'viridis'", - "limitation": null, - "ignored": false, - "docstring": "Colormap recognized by matplotlib." - }, - { - "name": "xticks_rotation", - "type": "Literal['vertical', 'horizontal']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Rotation of xtick labels." - }, - { - "name": "values_format", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Format specification for values in confusion matrix. If `None`, the format specification is 'd' or '.2g' whichever is shorter." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "colorbar", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to add a colorbar to the plot." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization.\n\nParameters\n----------\ninclude_values : bool, default=True\n Includes values in confusion matrix.\n\ncmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\nxticks_rotation : {'vertical', 'horizontal'} or float, default='horizontal'\n Rotation of xtick labels.\n\nvalues_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\ncolorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`" - } - ], - "docstring": "Confusion Matrix visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nconfusion_matrix : ndarray of shape (n_classes, n_classes)\n Confusion matrix.\n\ndisplay_labels : ndarray of shape (n_classes,), default=None\n Display labels for plot. If None, display labels are set from 0 to\n `n_classes - 1`.\n\nAttributes\n----------\nim_ : matplotlib AxesImage\n Image representing the confusion matrix.\n\ntext_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, or None\n Array of matplotlib axes. `None` if `include_values` is false.\n\nax_ : matplotlib Axes\n Axes with confusion matrix.\n\nfigure_ : matplotlib Figure\n Figure containing the confusion matrix.\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\nplot_confusion_matrix : Plot Confusion Matrix.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n>>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n... display_labels=clf.classes_)\n>>> disp.plot() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_confusion_matrix", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of labels to index the matrix. This may be used to reorder or select a subset of labels. If `None` is given, those that appear at least once in `y_true` or `y_pred` are used in sorted order." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "normalize", - "type": "Literal['true', 'pred', 'all']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized." - }, - { - "name": "display_labels", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target names used for plotting. By default, `labels` will be used if it is defined, otherwise the unique labels of `y_true` and `y_pred` will be used." - }, - { - "name": "include_values", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Includes values in confusion matrix." - }, - { - "name": "xticks_rotation", - "type": "Literal['vertical', 'horizontal']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Rotation of xtick labels." - }, - { - "name": "values_format", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Format specification for values in confusion matrix. If `None`, the format specification is 'd' or '.2g' whichever is shorter." - }, - { - "name": "cmap", - "type": "str", - "hasDefault": true, - "default": "'viridis'", - "limitation": null, - "ignored": false, - "docstring": "Colormap recognized by matplotlib." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "colorbar", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to add a colorbar to the plot. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot Confusion Matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny_true : array-like of shape (n_samples,)\n Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to reorder or\n select a subset of labels. If `None` is given, those that appear at\n least once in `y_true` or `y_pred` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n Target names used for plotting. By default, `labels` will be used if\n it is defined, otherwise the unique labels of `y_true` and `y_pred`\n will be used.\n\ninclude_values : bool, default=True\n Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float, default='horizontal'\n Rotation of xtick labels.\n\nvalues_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\ncolorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_confusion_matrix\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> plot_confusion_matrix(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.metrics._plot.det_curve", - "imports": [ - "import scipy as sp", - "from base import _get_response", - "from None import det_curve", - "from utils import check_matplotlib_support", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "DetCurveDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "False positive rate." - }, - { - "name": "tpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True positive rate." - }, - { - "name": "estimator_name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of estimator. If None, the estimator name is not shown." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of DET curve for labeling. If `None`, use the name of the estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization.\n\nParameters\n----------\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n Object that stores computed values." - } - ], - "docstring": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_det_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nfpr : ndarray\n False positive rate.\n\ntpr : ndarray\n True positive rate.\n\nestimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n The label of the positive class.\n\nAttributes\n----------\nline_ : matplotlib Artist\n DET Curve.\n\nax_ : matplotlib Axes\n Axes with DET Curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nplot_det_curve : Plot detection error tradeoff (DET) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = metrics.det_curve(y, pred)\n>>> display = metrics.DetCurveDisplay(\n... fpr=fpr, fnr=fnr, estimator_name='example estimator'\n... )\n>>> display.plot() # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_det_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "response_method", - "type": "Literal['predict_proba', 'decision_function', 'auto']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the predicted target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of DET curve for labeling. If `None`, use the name of the estimator." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The label of the positive class. When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an error will be raised." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'} default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the predicted target response. If set to\n 'auto', :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The label of the positive class.\n When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n `pos_label` is set to 1, otherwise an error will be raised.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay : DET curve visualization.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n... X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_det_curve(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.metrics._plot.precision_recall_curve", - "imports": [ - "from base import _get_response", - "from None import average_precision_score", - "from None import precision_recall_curve", - "from utils import check_matplotlib_support", - "from utils.validation import _deprecate_positional_args", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "PrecisionRecallDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "precision", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Precision values." - }, - { - "name": "recall", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Recall values." - }, - { - "name": "average_precision", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Average precision. If None, the average precision is not shown." - }, - { - "name": "estimator_name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of estimator. If None, then the estimator name is not shown." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class. If None, the class will not be shown in the legend. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of precision recall curve for labeling. If `None`, use the name of the estimator." - }, - { - "name": "**kwargs", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to be passed to matplotlib's `plot`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nParameters\n----------\nax : Matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of precision recall curve for labeling. If `None`, use the\n name of the estimator.\n\n**kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values." - } - ], - "docstring": "Precision Recall visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`\nto create a visualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n-----------\nprecision : ndarray\n Precision values.\n\nrecall : ndarray\n Recall values.\n\naverage_precision : float, default=None\n Average precision. If None, the average precision is not shown.\n\nestimator_name : str, default=None\n Name of estimator. If None, then the estimator name is not shown.\n\npos_label : str or int, default=None\n The class considered as the positive class. If None, the class will not\n be shown in the legend.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n Precision recall curve.\n\nax_ : matplotlib Axes\n Axes with precision recall curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\nplot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import (precision_recall_curve,\n... PrecisionRecallDisplay)\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> precision, recall, _ = precision_recall_curve(y_test, predictions)\n>>> disp = PrecisionRecallDisplay(precision=precision, recall=recall)\n>>> disp.plot() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_precision_recall_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Binary target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "response_method", - "type": "Literal['predict_proba', 'decision_function', 'auto']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name for labeling curve. If `None`, the name of the estimator is used." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the precision and recall metrics. By default, `estimators.classes_[1]` is considered as the positive class. .. versionadded:: 0.24" - }, - { - "name": "**kwargs", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments to be passed to matplotlib's `plot`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Binary target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name for labeling curve. If `None`, the name of the\n estimator is used.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the precision\n and recall metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\n**kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\nPrecisionRecallDisplay : Precision Recall visualization." - } - ] - }, - { - "name": "sklearn.metrics._plot.roc_curve", - "imports": [ - "from base import _get_response", - "from None import auc", - "from None import roc_curve", - "from utils import check_matplotlib_support", - "from utils.validation import _deprecate_positional_args", - "import matplotlib.pyplot as plt" - ], - "classes": [ - { - "name": "RocCurveDisplay", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "fpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "False positive rate." - }, - { - "name": "tpr", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "True positive rate." - }, - { - "name": "roc_auc", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Area under ROC curve. If None, the roc_auc score is not shown." - }, - { - "name": "estimator_name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of estimator. If None, the estimator name is not shown." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the roc auc metrics. By default, `estimators.classes_[1]` is considered as the positive class. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "plot", - "decorators": [], - "parameters": [ - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of ROC Curve for labeling. If `None`, use the name of the estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot visualization\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.\n\nParameters\n----------\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n Object that stores computed values." - } - ], - "docstring": "ROC Curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfpr : ndarray\n False positive rate.\n\ntpr : ndarray\n True positive rate.\n\nroc_auc : float, default=None\n Area under ROC curve. If None, the roc_auc score is not shown.\n\nestimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n ROC Curve.\n\nax_ : matplotlib Axes\n Axes with ROC Curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n>>> roc_auc = metrics.auc(fpr, tpr)\n>>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='example estimator')\n>>> display.plot() # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ], - "functions": [ - { - "name": "plot_roc_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a classifier." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input values." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - }, - { - "name": "drop_intermediate", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve. This is useful in order to create lighter ROC curves." - }, - { - "name": "response_method", - "type": "Literal['predict_proba', 'decision_function', 'auto']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies whether to use :term:`predict_proba` or :term:`decision_function` as the target response. If set to 'auto', :term:`predict_proba` is tried first and if it does not exist :term:`decision_function` is tried next." - }, - { - "name": "name", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of ROC Curve for labeling. If `None`, use the name of the estimator." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes object to plot on. If `None`, a new figure and axes is created." - }, - { - "name": "pos_label", - "type": "Union[int, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class considered as the positive class when computing the roc auc metrics. By default, `estimators.classes_[1]` is considered as the positive class. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndrop_intermediate : boolean, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'} default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.RocCurveDisplay`\n Object that stores computed values.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n... X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP" - } - ] - }, - { - "name": "sklearn.metrics._plot", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_confusion_matrix", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_array_equal", - "from sklearn.compose import make_column_transformer", - "from sklearn.datasets import make_classification", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.svm import SVC", - "from sklearn.svm import SVR", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import plot_confusion_matrix", - "from sklearn.metrics import ConfusionMatrixDisplay", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "n_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fitted_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "y_pred", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_on_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_on_invalid_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_confusion_matrix_custom_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_confusion_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_display", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_contrast", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_confusion_matrix_colorbar", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_text_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_confusion_matrix_standard_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_on_a_dataset_with_unseen_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that when labels=None, the unique values in `y_pred` and `y_true`\nwill be used.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/pull/18405" - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_curve_common", - "imports": [ - "import pytest", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.compose import make_column_transformer", - "from sklearn.datasets import load_iris", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.metrics import plot_det_curve", - "from sklearn.metrics import plot_roc_curve" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_curve_error_non_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_curve_error_no_response", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_curve_estimator_name_multiple_calls", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_det_curve_not_fitted_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_det_curve", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_iris", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.metrics import det_curve", - "from sklearn.metrics import plot_det_curve", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_det_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_precision_recall", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.metrics import plot_precision_recall_curve", - "from sklearn.metrics import PrecisionRecallDisplay", - "from sklearn.metrics import average_precision_score", - "from sklearn.metrics import precision_recall_curve", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.model_selection import train_test_split", - "from sklearn.exceptions import NotFittedError", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import shuffle", - "from sklearn.compose import make_column_transformer", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "test_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error_bad_response", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_precision_recall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision_recall_curve_string_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_precision_recall_curve_estimator_name_multiple_calls", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_precision_recall_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests.test_plot_roc_curve", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from sklearn.metrics import plot_roc_curve", - "from sklearn.metrics import RocCurveDisplay", - "from sklearn.metrics import roc_curve", - "from sklearn.metrics import auc", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_breast_cancer", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.model_selection import train_test_split", - "from sklearn.exceptions import NotFittedError", - "from sklearn.pipeline import make_pipeline", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.utils import shuffle", - "from sklearn.compose import make_column_transformer", - "import matplotlib as mpl" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "data_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_roc_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_roc_curve_not_fitted_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_default_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_roc_curve_pos_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.metrics._plot.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.mixture._base", - "imports": [ - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from time import time", - "import numpy as np", - "from scipy.special import logsumexp", - "from None import cluster", - "from base import BaseEstimator", - "from base import DensityMixin", - "from exceptions import ConvergenceWarning", - "from utils import check_array", - "from utils import check_random_state", - "from utils.validation import check_is_fitted" - ], - "classes": [ - { - "name": "BaseMixture", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_initial_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check values of the basic parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check initial parameters of the derived class.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_initialize_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "random_state", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A random number generator instance that controls the random seed used for the method chosen to initialize the parameters." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the model parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nrandom_state : RandomState\n A random number generator instance that controls the random seed\n used for the method chosen to initialize the parameters." - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the model parameters of the derived class.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Estimate model parameters with the EM algorithm.\n\nThe method fits the model ``n_init`` times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for ``max_iter``\ntimes until the change of likelihood or lower bound is less than\n``tol``, otherwise, a ``ConvergenceWarning`` is raised.\nIf ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\ninitialization is performed upon the first call. Upon consecutive\ncalls, training starts where it left off.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nself" - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate model parameters using X and predict the labels for X.\n\nThe method fits the model n_init times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for `max_iter`\ntimes until the change of likelihood or lower bound is less than\n`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\nraised. After fitting, it predicts the most probable label for the\ninput data points.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n Component labels." - }, - { - "name": "_e_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "E step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob_norm : float\n Mean of the logarithms of the probabilities of each sample in X\n\nlog_responsibility : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_m_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_get_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the weighted log probabilities for each sample.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlog_prob : array, shape (n_samples,)\n Log probabilities of each data point in X." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the per-sample average log-likelihood of the given data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_dimensions)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlog_likelihood : float\n Log likelihood of the Gaussian mixture given X." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels for the data samples in X using trained model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n Component labels." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict posterior probability of each component given the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nresp : array, shape (n_samples, n_components)\n Returns the probability each Gaussian (state) in\n the model given each sample." - }, - { - "name": "sample", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate random samples from the fitted Gaussian distribution.\n\nParameters\n----------\nn_samples : int, default=1\n Number of samples to generate.\n\nReturns\n-------\nX : array, shape (n_samples, n_features)\n Randomly generated sample\n\ny : array, shape (nsamples,)\n Component labels" - }, - { - "name": "_estimate_weighted_log_prob", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nweighted_log_prob : array, shape (n_samples, n_component)" - }, - { - "name": "_estimate_log_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\nReturns\n-------\nlog_weight : array, shape (n_components, )" - }, - { - "name": "_estimate_log_prob", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the log-probabilities log P(X | Z).\n\nCompute the log-probabilities per each component for each sample.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_component)" - }, - { - "name": "_estimate_log_prob_resp", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate log probabilities and responsibilities for each sample.\n\nCompute the log probabilities, weighted log probabilities per\ncomponent and responsibilities for each sample in X with respect to\nthe current state of the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob_norm : array, shape (n_samples,)\n log p(X)\n\nlog_responsibilities : array, shape (n_samples, n_components)\n logarithm of the responsibilities" - }, - { - "name": "_print_verbose_msg_init_beg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print verbose message on initialization." - }, - { - "name": "_print_verbose_msg_iter_end", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print verbose message on initialization." - }, - { - "name": "_print_verbose_msg_init_end", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print verbose message on the end of iteration." - } - ], - "docstring": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models." - } - ], - "functions": [ - { - "name": "_check_shape", - "decorators": [], - "parameters": [ - { - "name": "param", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "param_shape", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate the shape of the input parameter 'param'.\n\nParameters\n----------\nparam : array\n\nparam_shape : tuple\n\nname : string" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the input data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nn_components : int\n\nReturns\n-------\nX : array, shape (n_samples, n_features)" - } - ] - }, - { - "name": "sklearn.mixture._bayesian_mixture", - "imports": [ - "import math", - "import numpy as np", - "from scipy.special import betaln", - "from scipy.special import digamma", - "from scipy.special import gammaln", - "from _base import BaseMixture", - "from _base import _check_shape", - "from _gaussian_mixture import _check_precision_matrix", - "from _gaussian_mixture import _check_precision_positivity", - "from _gaussian_mixture import _compute_log_det_cholesky", - "from _gaussian_mixture import _compute_precision_cholesky", - "from _gaussian_mixture import _estimate_gaussian_parameters", - "from _gaussian_mixture import _estimate_log_gaussian_prob", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BayesianGaussianMixture", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of mixture components. Depending on the data and the value of the `weight_concentration_prior` the model can decide to not use all the components by setting some component `weights_` to values very close to zero. The number of effective components is therefore smaller than n_components." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": true, - "default": "'full'", - "limitation": null, - "ignored": false, - "docstring": "String describing the type of covariance parameters to use. Must be one of:: 'full' (each component has its own general covariance matrix), 'tied' (all components share the same general covariance matrix), 'diag' (each component has its own diagonal covariance matrix), 'spherical' (each component has its own single variance)." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The convergence threshold. EM iterations will stop when the lower bound average gain on the likelihood (of the training data with respect to the model) is below this threshold." - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Non-negative regularization added to the diagonal of covariance. Allows to assure that the covariance matrices are all positive." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of EM iterations to perform." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of initializations to perform. The result with the highest lower bound value on the likelihood is kept." - }, - { - "name": "init_params", - "type": "Literal['kmeans', 'random']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The method used to initialize the weights, the means and the covariances. Must be one of:: 'kmeans' : responsibilities are initialized using kmeans. 'random' : responsibilities are initialized randomly." - }, - { - "name": "weight_concentration_prior_type", - "type": "str", - "hasDefault": true, - "default": "'dirichlet_process'", - "limitation": null, - "ignored": false, - "docstring": "String describing the type of the weight concentration prior. Must be one of:: 'dirichlet_process' (using the Stick-breaking representation), 'dirichlet_distribution' (can favor more uniform weights)." - }, - { - "name": "weight_concentration_prior", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The dirichlet concentration of each component on the weight distribution (Dirichlet). This is commonly called gamma in the literature. The higher concentration puts more mass in the center and will lead to more components being active, while a lower concentration parameter will lead to more mass at the edge of the mixture weights simplex. The value of the parameter must be greater than 0. If it is None, it's set to ``1. / n_components``." - }, - { - "name": "mean_precision_prior", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The precision prior on the mean distribution (Gaussian). Controls the extent of where means can be placed. Larger values concentrate the cluster means around `mean_prior`. The value of the parameter must be greater than 0. If it is None, it is set to 1." - }, - { - "name": "mean_prior", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prior on the mean distribution (Gaussian). If it is None, it is set to the mean of X." - }, - { - "name": "degrees_of_freedom_prior", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prior of the number of degrees of freedom on the covariance distributions (Wishart). If it is None, it's set to `n_features`." - }, - { - "name": "covariance_prior", - "type": "Union[ArrayLike, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The prior on the covariance distribution (Wishart). If it is None, the emiprical covariance prior is initialized using the covariance of X. The shape depends on `covariance_type`:: (n_features, n_features) if 'full', (n_features, n_features) if 'tied', (n_features) if 'diag', float if 'spherical'" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to the method chosen to initialize the parameters (see `init_params`). In addition, it controls the generation of random samples from the fitted distribution (see the method `sample`). Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If 'warm_start' is True, the solution of the last fitting is used as initialization for the next call of fit(). This can speed up convergence when fit is called several times on similar problems. See :term:`the Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints the current initialization and each iteration step. If greater than 1 then it prints also the log probability and the time needed for each step." - }, - { - "name": "verbose_interval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of iteration done before the next print." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that the parameters are well defined.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_check_weights_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the parameter of the Dirichlet distribution." - }, - { - "name": "_check_means_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the parameters of the Gaussian distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_check_precision_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the prior parameters of the precision distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_checkcovariance_prior_parameter", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the `covariance_prior_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)" - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialization of the mixture parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)" - }, - { - "name": "_estimate_weights", - "decorators": [], - "parameters": [ - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the parameters of the Dirichlet distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)" - }, - { - "name": "_estimate_means", - "decorators": [], - "parameters": [ - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the parameters of the Gaussian distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)" - }, - { - "name": "_estimate_precisions", - "decorators": [], - "parameters": [ - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The shape depends of `covariance_type`: 'full' : (n_components, n_features, n_features) 'tied' : (n_features, n_features) 'diag' : (n_components, n_features) 'spherical' : (n_components,)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the precisions parameters of the precision distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like\n The shape depends of `covariance_type`:\n 'full' : (n_components, n_features, n_features)\n 'tied' : (n_features, n_features)\n 'diag' : (n_components, n_features)\n 'spherical' : (n_components,)" - }, - { - "name": "_estimate_wishart_full", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the full Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components, n_features, n_features)" - }, - { - "name": "_estimate_wishart_tied", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the tied Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_features, n_features)" - }, - { - "name": "_estimate_wishart_diag", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the diag Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components, n_features)" - }, - { - "name": "_estimate_wishart_spherical", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "xk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "sk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the spherical Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components,)" - }, - { - "name": "_m_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_estimate_log_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimate_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_lower_bound", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - }, - { - "name": "log_prob_norm", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the probability of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the lower bound of the model.\n\nThe lower bound on the likelihood (of the training data with respect to\nthe model) is used to detect the convergence and has to increase at\neach iteration.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n\nlog_prob_norm : float\n Logarithm of the probability of each sample in X.\n\nReturns\n-------\nlower_bound : float" - }, - { - "name": "_get_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=1\n The number of mixture components. Depending on the data and the value\n of the `weight_concentration_prior` the model can decide to not use\n all the components by setting some component `weights_` to values very\n close to zero. The number of effective components is therefore smaller\n than n_components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of::\n\n 'full' (each component has its own general covariance matrix),\n 'tied' (all components share the same general covariance matrix),\n 'diag' (each component has its own diagonal covariance matrix),\n 'spherical' (each component has its own single variance).\n\ntol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain on the likelihood (of the training data with\n respect to the model) is below this threshold.\n\nreg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n The number of EM iterations to perform.\n\nn_init : int, default=1\n The number of initializations to perform. The result with the highest\n lower bound value on the likelihood is kept.\n\ninit_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n covariances.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\nweight_concentration_prior_type : str, default='dirichlet_process'\n String describing the type of the weight concentration prior.\n Must be one of::\n\n 'dirichlet_process' (using the Stick-breaking representation),\n 'dirichlet_distribution' (can favor more uniform weights).\n\nweight_concentration_prior : float | None, default=None.\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). This is commonly called gamma in the\n literature. The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n mixture weights simplex. The value of the parameter must be greater\n than 0. If it is None, it's set to ``1. / n_components``.\n\nmean_precision_prior : float | None, default=None.\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed. Larger\n values concentrate the cluster means around `mean_prior`.\n The value of the parameter must be greater than 0.\n If it is None, it is set to 1.\n\nmean_prior : array-like, shape (n_features,), default=None.\n The prior on the mean distribution (Gaussian).\n If it is None, it is set to the mean of X.\n\ndegrees_of_freedom_prior : float | None, default=None.\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart). If it is None, it's set to `n_features`.\n\ncovariance_prior : float or array-like, default=None.\n The prior on the covariance distribution (Wishart).\n If it is None, the emiprical covariance prior is initialized using the\n covariance of X. The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n See :term:`the Glossary `.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\nverbose_interval : int, default=10\n Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\ncovariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n Number of step used by the best fit of inference to reach the\n convergence.\n\nlower_bound_ : float\n Lower bound value on the likelihood (of the training data with\n respect to the model) of the best fit of inference.\n\nweight_concentration_prior_ : tuple or float\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). The type depends on\n ``weight_concentration_prior_type``::\n\n (float, float) if 'dirichlet_process' (Beta parameters),\n float if 'dirichlet_distribution' (Dirichlet parameters).\n\n The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n simplex.\n\nweight_concentration_ : array-like of shape (n_components,)\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet).\n\nmean_precision_prior_ : float\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed.\n Larger values concentrate the cluster means around `mean_prior`.\n If mean_precision_prior is set to None, `mean_precision_prior_` is set\n to 1.\n\nmean_precision_ : array-like of shape (n_components,)\n The precision of each components on the mean distribution (Gaussian).\n\nmean_prior_ : array-like of shape (n_features,)\n The prior on the mean distribution (Gaussian).\n\ndegrees_of_freedom_prior_ : float\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart).\n\ndegrees_of_freedom_ : array-like of shape (n_components,)\n The number of degrees of freedom of each components in the model.\n\ncovariance_prior_ : float or array-like\n The prior on the covariance distribution (Wishart).\n The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import BayesianGaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n>>> bgm.means_\narray([[2.49... , 2.29...],\n [8.45..., 4.52... ]])\n>>> bgm.predict([[0, 0], [9, 3]])\narray([0, 1])\n\nSee Also\n--------\nGaussianMixture : Finite Gaussian mixture fit with EM.\n\nReferences\n----------\n\n.. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n learning\". Vol. 4 No. 4. New York: Springer.\n `_\n\n.. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n Graphical Models\". In Advances in Neural Information Processing\n Systems 12.\n `_\n\n.. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n `_" - } - ], - "functions": [ - { - "name": "_log_dirichlet_norm", - "decorators": [], - "parameters": [ - { - "name": "dirichlet_concentration", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The parameters values of the Dirichlet distribution." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log of the Dirichlet distribution normalization term.\n\nParameters\n----------\ndirichlet_concentration : array-like of shape (n_samples,)\n The parameters values of the Dirichlet distribution.\n\nReturns\n-------\nlog_dirichlet_norm : float\n The log normalization of the Dirichlet distribution." - }, - { - "name": "_log_wishart_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log of the Wishart distribution normalization term.\n\nParameters\n----------\ndegrees_of_freedom : array-like of shape (n_components,)\n The number of degrees of freedom on the covariance Wishart\n distributions.\n\nlog_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component.\n\nn_features : int\n The number of features.\n\nReturn\n------\nlog_wishart_norm : array-like of shape (n_components,)\n The log normalization of the Wishart distribution." - } - ] - }, - { - "name": "sklearn.mixture._gaussian_mixture", - "imports": [ - "import numpy as np", - "from scipy import linalg", - "from _base import BaseMixture", - "from _base import _check_shape", - "from utils import check_array", - "from utils.extmath import row_norms", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "GaussianMixture", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of mixture components." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": true, - "default": "'full'", - "limitation": null, - "ignored": false, - "docstring": "String describing the type of covariance parameters to use. Must be one of: 'full' each component has its own general covariance matrix 'tied' all components share the same general covariance matrix 'diag' each component has its own diagonal covariance matrix 'spherical' each component has its own single variance" - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "The convergence threshold. EM iterations will stop when the lower bound average gain is below this threshold." - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": true, - "default": "1e-6", - "limitation": null, - "ignored": false, - "docstring": "Non-negative regularization added to the diagonal of covariance. Allows to assure that the covariance matrices are all positive." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "The number of EM iterations to perform." - }, - { - "name": "n_init", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of initializations to perform. The best results are kept." - }, - { - "name": "init_params", - "type": "Literal['kmeans', 'random']", - "hasDefault": true, - "default": "'kmeans'", - "limitation": null, - "ignored": false, - "docstring": "The method used to initialize the weights, the means and the precisions. Must be one of:: 'kmeans' : responsibilities are initialized using kmeans. 'random' : responsibilities are initialized randomly." - }, - { - "name": "weights_init", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-provided initial weights. If it is None, weights are initialized using the `init_params` method." - }, - { - "name": "means_init", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-provided initial means, If it is None, means are initialized using the `init_params` method." - }, - { - "name": "precisions_init", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The user-provided initial precisions (inverse of the covariance matrices). If it is None, precisions are initialized using the 'init_params' method. The shape depends on 'covariance_type':: (n_components,) if 'spherical', (n_features, n_features) if 'tied', (n_components, n_features) if 'diag', (n_components, n_features, n_features) if 'full'" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the random seed given to the method chosen to initialize the parameters (see `init_params`). In addition, it controls the generation of random samples from the fitted distribution (see the method `sample`). Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If 'warm_start' is True, the solution of the last fitting is used as initialization for the next call of fit(). This can speed up convergence when fit is called several times on similar problems. In that case, 'n_init' is ignored and only a single initialization occurs upon the first call. See :term:`the Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. If 1 then it prints the current initialization and each iteration step. If greater than 1 then it prints also the log probability and the time needed for each step." - }, - { - "name": "verbose_interval", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of iteration done before the next print." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the Gaussian mixture parameters are well defined." - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialization of the Gaussian mixture parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)" - }, - { - "name": "_m_step", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "log_resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X." - }, - { - "name": "_estimate_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimate_log_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_lower_bound", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_n_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the number of free parameters in the model." - }, - { - "name": "bic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Bayesian information criterion for the current model on the input X.\n\nParameters\n----------\nX : array of shape (n_samples, n_dimensions)\n\nReturns\n-------\nbic : float\n The lower the better." - }, - { - "name": "aic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Akaike information criterion for the current model on the input X.\n\nParameters\n----------\nX : array of shape (n_samples, n_dimensions)\n\nReturns\n-------\naic : float\n The lower the better." - } - ], - "docstring": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_components : int, default=1\n The number of mixture components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of:\n\n 'full'\n each component has its own general covariance matrix\n 'tied'\n all components share the same general covariance matrix\n 'diag'\n each component has its own diagonal covariance matrix\n 'spherical'\n each component has its own single variance\n\ntol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain is below this threshold.\n\nreg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n The number of EM iterations to perform.\n\nn_init : int, default=1\n The number of initializations to perform. The best results are kept.\n\ninit_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n precisions.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\nweights_init : array-like of shape (n_components, ), default=None\n The user-provided initial weights.\n If it is None, weights are initialized using the `init_params` method.\n\nmeans_init : array-like of shape (n_components, n_features), default=None\n The user-provided initial means,\n If it is None, means are initialized using the `init_params` method.\n\nprecisions_init : array-like, default=None\n The user-provided initial precisions (inverse of the covariance\n matrices).\n If it is None, precisions are initialized using the 'init_params'\n method.\n The shape depends on 'covariance_type'::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n In that case, 'n_init' is ignored and only a single initialization\n occurs upon the first call.\n See :term:`the Glossary `.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\nverbose_interval : int, default=10\n Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\ncovariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n Number of step used by the best fit of EM to reach the convergence.\n\nlower_bound_ : float\n Lower bound value on the log-likelihood (of the training data with\n respect to the model) of the best fit of EM.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import GaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n>>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n>>> gm.means_\narray([[10., 2.],\n [ 1., 2.]])\n>>> gm.predict([[0, 0], [12, 3]])\narray([1, 0])\n\nSee Also\n--------\nBayesianGaussianMixture : Gaussian mixture model fit with a variational\n inference." - } - ], - "functions": [ - { - "name": "_check_weights", - "decorators": [], - "parameters": [ - { - "name": "weights", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The proportions of components of each mixture." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the user provided 'weights'.\n\nParameters\n----------\nweights : array-like of shape (n_components,)\n The proportions of components of each mixture.\n\nn_components : int\n Number of components.\n\nReturns\n-------\nweights : array, shape (n_components,)" - }, - { - "name": "_check_means", - "decorators": [], - "parameters": [ - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The centers of the current components." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Validate the provided 'means'.\n\nParameters\n----------\nmeans : array-like of shape (n_components, n_features)\n The centers of the current components.\n\nn_components : int\n Number of components.\n\nn_features : int\n Number of features.\n\nReturns\n-------\nmeans : array, (n_components, n_features)" - }, - { - "name": "_check_precision_positivity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check a precision vector is positive-definite." - }, - { - "name": "_check_precision_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check a precision matrix is symmetric and positive-definite." - }, - { - "name": "_check_precisions_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the precision matrices are symmetric and positive-definite." - }, - { - "name": "_check_precisions", - "decorators": [], - "parameters": [ - { - "name": "precisions", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,)" - }, - { - "name": "covariance_type", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of components." - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate user provided precisions.\n\nParameters\n----------\nprecisions : array-like\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : string\n\nn_components : int\n Number of components.\n\nn_features : int\n Number of features.\n\nReturns\n-------\nprecisions : array" - }, - { - "name": "_estimate_gaussian_covariances_full", - "decorators": [], - "parameters": [ - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the full covariance matrices.\n\nParameters\n----------\nresp : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariances : array, shape (n_components, n_features, n_features)\n The covariance matrix of the current components." - }, - { - "name": "_estimate_gaussian_covariances_tied", - "decorators": [], - "parameters": [ - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the tied covariance matrix.\n\nParameters\n----------\nresp : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariance : array, shape (n_features, n_features)\n The tied covariance matrix of the components." - }, - { - "name": "_estimate_gaussian_covariances_diag", - "decorators": [], - "parameters": [ - { - "name": "responsibilities", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the diagonal covariance vectors.\n\nParameters\n----------\nresponsibilities : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariances : array, shape (n_components, n_features)\n The covariance vector of the current components." - }, - { - "name": "_estimate_gaussian_covariances_spherical", - "decorators": [], - "parameters": [ - { - "name": "responsibilities", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "nk", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the spherical variance values.\n\nParameters\n----------\nresponsibilities : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\nvariances : array, shape (n_components,)\n The variance values of each components." - }, - { - "name": "_estimate_gaussian_parameters", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data array." - }, - { - "name": "resp", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The responsibilities for each data sample in X." - }, - { - "name": "reg_covar", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The regularization added to the diagonal of the covariance matrices." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The type of precision matrices." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the Gaussian distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data array.\n\nresp : array-like of shape (n_samples, n_components)\n The responsibilities for each data sample in X.\n\nreg_covar : float\n The regularization added to the diagonal of the covariance matrices.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\nReturns\n-------\nnk : array-like of shape (n_components,)\n The numbers of data samples in the current components.\n\nmeans : array-like of shape (n_components, n_features)\n The centers of the current components.\n\ncovariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type." - }, - { - "name": "_compute_precision_cholesky", - "decorators": [], - "parameters": [ - { - "name": "covariances", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The covariance matrix of the current components. The shape depends of the covariance_type." - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The type of precision matrices." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the Cholesky decomposition of the precisions.\n\nParameters\n----------\ncovariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\nReturns\n-------\nprecisions_cholesky : array-like\n The cholesky decomposition of sample precisions of the current\n components. The shape depends of the covariance_type." - }, - { - "name": "_compute_log_det_cholesky", - "decorators": [], - "parameters": [ - { - "name": "matrix_chol", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cholesky decompositions of the matrices. 'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,)" - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_features", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log-det of the cholesky decomposition of matrices.\n\nParameters\n----------\nmatrix_chol : array-like\n Cholesky decompositions of the matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nn_features : int\n Number of features.\n\nReturns\n-------\nlog_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component." - }, - { - "name": "_estimate_log_gaussian_prob", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "means", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "precisions_chol", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cholesky decompositions of the precision matrices. 'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,)" - }, - { - "name": "covariance_type", - "type": "Literal['full', 'tied', 'diag', 'spherical']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate the log Gaussian probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nmeans : array-like of shape (n_components, n_features)\n\nprecisions_chol : array-like\n Cholesky decompositions of the precision matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_components)" - } - ] - }, - { - "name": "sklearn.mixture", - "imports": [ - "from _gaussian_mixture import GaussianMixture", - "from _bayesian_mixture import BayesianGaussianMixture" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.mixture.tests.test_bayesian_mixture", - "imports": [ - "import copy", - "import numpy as np", - "from scipy.special import gammaln", - "import pytest", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm", - "from sklearn.mixture._bayesian_mixture import _log_wishart_norm", - "from sklearn.mixture import BayesianGaussianMixture", - "from sklearn.mixture.tests.test_gaussian_mixture import RandomData", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_log_dirichlet_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_wishart_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_covariance_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_weight_concentration_prior_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_weights_prior_initialisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_mean_prior_initialisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_precisions_prior_initialisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_monotonic_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compare_covar_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_covariance_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invariant_translation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_fit_predict_n_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bayesian_mixture_predict_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.mixture.tests.test_gaussian_mixture", - "imports": [ - "import sys", - "import copy", - "import warnings", - "import pytest", - "import numpy as np", - "from scipy import stats", - "from scipy import linalg", - "from sklearn.covariance import EmpiricalCovariance", - "from sklearn.datasets import make_spd_matrix", - "from io import StringIO", - "from sklearn.metrics.cluster import adjusted_rand_score", - "from sklearn.mixture import GaussianMixture", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_full", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_tied", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_diag", - "from sklearn.mixture._gaussian_mixture import _estimate_gaussian_covariances_spherical", - "from sklearn.mixture._gaussian_mixture import _compute_precision_cholesky", - "from sklearn.mixture._gaussian_mixture import _compute_log_det_cholesky", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils.extmath import fast_logdet", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.mixture._base import _check_X", - "from sklearn.mixture._gaussian_mixture import _estimate_log_gaussian_prob" - ], - "classes": [ - { - "name": "RandomData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "generate_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_means", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_precisions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suffstat_sk_full", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suffstat_sk_tied", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suffstat_sk_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_suffstat_sk_spherical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_log_det_cholesky", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_naive_lmvnpdf_diag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_log_probabilities", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_estimate_log_prob_resp", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_predict_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_predict_n_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_best_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_fit_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiple_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_n_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bic_1d_1component", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_aic_bic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_mixture_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_detected_with_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_monotonic_likelihood", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regularisation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.mixture.tests.test_mixture", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.mixture import GaussianMixture", - "from sklearn.mixture import BayesianGaussianMixture" - ], - "classes": [], - "functions": [ - { - "name": "test_gaussian_mixture_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.mixture.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.model_selection._search", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from collections import defaultdict", - "from collections.abc import Mapping", - "from collections.abc import Sequence", - "from collections.abc import Iterable", - "from functools import partial", - "from functools import reduce", - "from itertools import product", - "import numbers", - "import operator", - "import time", - "import warnings", - "import numpy as np", - "from numpy.ma import MaskedArray", - "from scipy.stats import rankdata", - "from base import BaseEstimator", - "from base import is_classifier", - "from base import clone", - "from base import MetaEstimatorMixin", - "from _split import check_cv", - "from _validation import _fit_and_score", - "from _validation import _aggregate_score_dicts", - "from _validation import _insert_error_scores", - "from _validation import _normalize_score_results", - "from exceptions import NotFittedError", - "from joblib import Parallel", - "from utils import check_random_state", - "from utils.random import sample_without_replacement", - "from utils._tags import _safe_tags", - "from utils.validation import indexable", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_fit_params", - "from utils.validation import _deprecate_positional_args", - "from utils.metaestimators import if_delegate_has_method", - "from utils.fixes import delayed", - "from metrics._scorer import _check_multimetric_scoring", - "from metrics import check_scoring", - "from utils import deprecated" - ], - "classes": [ - { - "name": "ParameterGrid", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "param_grid", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The parameter grid to explore, as a dictionary mapping estimator parameters to sequences of allowed values. An empty dict signifies default parameters. A sequence of dicts signifies a sequence of grids to search, and is useful to avoid exploring parameter combinations that make no sense or have no effect. See the examples below." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Iterate over the points in the grid.\n\nReturns\n-------\nparams : iterator over dict of str to any\n Yields dictionaries mapping each estimator parameter to one of its\n allowed values." - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Number of points on the grid." - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [ - { - "name": "ind", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The iteration index" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the parameters that would be ``ind``th in iteration\n\nParameters\n----------\nind : int\n The iteration index\n\nReturns\n-------\nparams : dict of str to any\n Equal to list(self)[ind]" - } - ], - "docstring": "Grid of parameters with a discrete number of values for each.\n\nCan be used to iterate over parameter value combinations with the\nPython built-in function iter.\nThe order of the generated parameter combinations is deterministic.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nparam_grid : dict of str to sequence, or sequence of such\n The parameter grid to explore, as a dictionary mapping estimator\n parameters to sequences of allowed values.\n\n An empty dict signifies default parameters.\n\n A sequence of dicts signifies a sequence of grids to search, and is\n useful to avoid exploring parameter combinations that make no sense\n or have no effect. See the examples below.\n\nExamples\n--------\n>>> from sklearn.model_selection import ParameterGrid\n>>> param_grid = {'a': [1, 2], 'b': [True, False]}\n>>> list(ParameterGrid(param_grid)) == (\n... [{'a': 1, 'b': True}, {'a': 1, 'b': False},\n... {'a': 2, 'b': True}, {'a': 2, 'b': False}])\nTrue\n\n>>> grid = [{'kernel': ['linear']}, {'kernel': ['rbf'], 'gamma': [1, 10]}]\n>>> list(ParameterGrid(grid)) == [{'kernel': 'linear'},\n... {'kernel': 'rbf', 'gamma': 1},\n... {'kernel': 'rbf', 'gamma': 10}]\nTrue\n>>> ParameterGrid(grid)[1] == {'kernel': 'rbf', 'gamma': 1}\nTrue\n\nSee Also\n--------\nGridSearchCV : Uses :class:`ParameterGrid` to perform a full parallelized\n parameter search." - }, - { - "name": "ParameterSampler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "param_distributions", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (`str`) as keys and distributions or lists of parameters to try. Distributions must provide a ``rvs`` method for sampling (such as those from scipy.stats.distributions). If a list is given, it is sampled uniformly. If a list of dicts is given, first a dict is sampled uniformly, and then a parameter is sampled using that dict as above." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of parameter settings that are produced." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_all_lists", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__iter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Number of points that will be sampled." - } - ], - "docstring": "Generator on parameters sampled from given distributions.\n\nNon-deterministic iterable over random candidate combinations for hyper-\nparameter search. If all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nparam_distributions : dict\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\nn_iter : int\n Number of parameter settings that are produced.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nparams : dict of str to any\n **Yields** dictionaries mapping each estimator parameter to\n as sampled value.\n\nExamples\n--------\n>>> from sklearn.model_selection import ParameterSampler\n>>> from scipy.stats.distributions import expon\n>>> import numpy as np\n>>> rng = np.random.RandomState(0)\n>>> param_grid = {'a':[1, 2], 'b': expon()}\n>>> param_list = list(ParameterSampler(param_grid, n_iter=4,\n... random_state=rng))\n>>> rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items())\n... for d in param_list]\n>>> rounded_list == [{'b': 0.89856, 'a': 1},\n... {'b': 0.923223, 'a': 1},\n... {'b': 1.878964, 'a': 2},\n... {'b': 1.038159, 'a': 2}]\nTrue" - }, - { - "name": "BaseSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_estimator_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the score on the given data, if the estimator has been refit.\n\nThis uses the score defined by ``scoring`` where provided, and the\n``best_estimator_.score`` method otherwise.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_output) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nReturns\n-------\nscore : float" - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to predict on. Must fulfill input requirements of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call score_samples on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``score_samples``.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements\n of the underlying estimator.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)" - }, - { - "name": "_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call predict on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call predict_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_proba``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call predict_log_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_log_proba``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call decision_function on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``decision_function``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call transform on the estimator with the best found parameters.\n\nOnly available if the underlying estimator supports ``transform`` and\n``refit=True``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xt", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Must fulfill the input assumptions of the underlying estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call inverse_transform on the estimator with the best found params.\n\nOnly available if the underlying estimator implements\n``inverse_transform`` and ``refit=True``.\n\nParameters\n----------\nXt : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator." - }, - { - "name": "n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [ - { - "name": "evaluate_candidates", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This callback accepts: - a list of candidates, where each candidate is a dict of parameter settings. - an optional `cv` parameter which can be used to e.g. evaluate candidates on different dataset splits, or evaluate candidates on subsampled data (as done in the SucessiveHaling estimators). By default, the original `cv` parameter is used, and it is available as a private `_checked_cv_orig` attribute. - an optional `more_results` dict. Each key will be added to the `cv_results_` attribute. Values should be lists of length `n_candidates` It returns a dict of all results so far, formatted like ``cv_results_``. Important note (relevant whether the default cv is used or not): in randomized splitters, and unless the random_state parameter of cv was set to an int, calling cv.split() multiple times will yield different splits. Since cv.split() is called in evaluate_candidates, this means that candidates will be evaluated on different splits each time evaluate_candidates is called. This might be a methodological issue depending on the search strategy that you're implementing. To prevent randomized splitters from being used, you may use _split._yields_constant_splits()" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Repeatedly calls `evaluate_candidates` to conduct a search.\n\nThis method, implemented in sub-classes, makes it possible to\ncustomize the the scheduling of evaluations: GridSearchCV and\nRandomizedSearchCV schedule evaluations for their whole parameter\nsearch space at once but other more sequential approaches are also\npossible: for instance is possible to iteratively schedule evaluations\nfor new regions of the parameter search space based on previously\ncollected evaluation results. This makes it possible to implement\nBayesian optimization or more generally sequential model-based\noptimization by deriving from the BaseSearchCV abstract base class.\nFor example, Successive Halving is implemented by calling\n`evaluate_candidates` multiples times (once per iteration of the SH\nprocess), each time passing a different set of candidates with `X`\nand `y` of increasing sizes.\n\nParameters\n----------\nevaluate_candidates : callable\n This callback accepts:\n - a list of candidates, where each candidate is a dict of\n parameter settings.\n - an optional `cv` parameter which can be used to e.g.\n evaluate candidates on different dataset splits, or\n evaluate candidates on subsampled data (as done in the\n SucessiveHaling estimators). By default, the original `cv`\n parameter is used, and it is available as a private\n `_checked_cv_orig` attribute.\n - an optional `more_results` dict. Each key will be added to\n the `cv_results_` attribute. Values should be lists of\n length `n_candidates`\n\n It returns a dict of all results so far, formatted like\n ``cv_results_``.\n\n Important note (relevant whether the default cv is used or not):\n in randomized splitters, and unless the random_state parameter of\n cv was set to an int, calling cv.split() multiple times will\n yield different splits. Since cv.split() is called in\n evaluate_candidates, this means that candidates will be evaluated\n on different splits each time evaluate_candidates is called. This\n might be a methodological issue depending on the search strategy\n that you're implementing. To prevent randomized splitters from\n being used, you may use _split._yields_constant_splits()\n\nExamples\n--------\n\n::\n\n def _run_search(self, evaluate_candidates):\n 'Try C=0.1 only if C=1 is better than C=10'\n all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n score = all_results['mean_test_score']\n if score[0] < score[1]:\n evaluate_candidates([{'C': 0.1}])" - }, - { - "name": "_check_refit_for_multimetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check `refit` is compatible with `scores` is valid" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the estimator" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Run fit with all sets of parameters.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_output) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n**fit_params : dict of str -> object\n Parameters passed to the ``fit`` method of the estimator" - }, - { - "name": "_format_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Abstract base class for hyper parameter search with cross-validation.\n " - }, - { - "name": "GridSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_grid", - "type": "Union[Dict, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (`str`) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings." - }, - { - "name": "scoring", - "type": "Union[Callable, str, Dict]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "pre_dispatch", - "type": "int", - "hasDefault": true, - "default": "n_jobs", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "refit", - "type": "Union[str, bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given ``cv_results_``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``GridSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. .. versionchanged:: 0.20 Support for callable added." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages. - >1 : the computation time for each fold and parameter candidate is displayed; - >2 : the score is also displayed; - >3 : the fold and candidate parameter indexes are also displayed together with the starting time of the computation." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error." - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False``" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Search all candidates in param_grid" - } - ], - "docstring": "Exhaustive search over specified parameter values for an estimator.\n\nImportant members are fit, predict.\n\nGridSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated grid-search over a parameter grid.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n Dictionary with parameters names (`str`) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\npre_dispatch : int, or str, default=n_jobs\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nrefit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given ``cv_results_``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``GridSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\n - >1 : the computation time for each fold and parameter candidate is\n displayed;\n - >2 : the score is also displayed;\n - >3 : the fold and candidate parameter indexes are also displayed\n together with the starting time of the computation.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\n\nExamples\n--------\n>>> from sklearn import svm, datasets\n>>> from sklearn.model_selection import GridSearchCV\n>>> iris = datasets.load_iris()\n>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}\n>>> svc = svm.SVC()\n>>> clf = GridSearchCV(svc, parameters)\n>>> clf.fit(iris.data, iris.target)\nGridSearchCV(estimator=SVC(),\n param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})\n>>> sorted(clf.cv_results_.keys())\n['mean_fit_time', 'mean_score_time', 'mean_test_score',...\n 'param_C', 'param_kernel', 'params',...\n 'rank_test_score', 'split0_test_score',...\n 'split2_test_score', ...\n 'std_fit_time', 'std_score_time', 'std_test_score']\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +------------+-----------+------------+-----------------+---+---------+\n |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|\n +============+===========+============+=================+===+=========+\n | 'poly' | -- | 2 | 0.80 |...| 2 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'poly' | -- | 3 | 0.70 |...| 4 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.1 | -- | 0.80 |...| 3 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.2 | -- | 0.93 |...| 1 |\n +------------+-----------+------------+-----------------+---+---------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],\n mask = [False False False False]...)\n 'param_gamma': masked_array(data = [-- -- 0.1 0.2],\n mask = [ True True False False]...),\n 'param_degree': masked_array(data = [2.0 3.0 -- --],\n mask = [False False True True]...),\n 'split0_test_score' : [0.80, 0.70, 0.80, 0.93],\n 'split1_test_score' : [0.82, 0.50, 0.70, 0.78],\n 'mean_test_score' : [0.81, 0.60, 0.75, 0.85],\n 'std_test_score' : [0.01, 0.10, 0.05, 0.08],\n 'rank_test_score' : [2, 4, 3, 1],\n 'split0_train_score' : [0.80, 0.92, 0.70, 0.93],\n 'split1_train_score' : [0.82, 0.55, 0.70, 0.87],\n 'mean_train_score' : [0.81, 0.74, 0.70, 0.90],\n 'std_train_score' : [0.01, 0.19, 0.00, 0.03],\n 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],\n 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00, 0.01],\n 'params' : [{'kernel': 'poly', 'degree': 2}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\n This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\nmultimetric_ : bool\n Whether or not the scorers compute several metrics.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the left out\ndata, unless an explicit score is passed in which case it is used instead.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\npoint in the grid (and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available. A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nSee Also\n---------\nParameterGrid : Generates all the combinations of a hyperparameter grid.\ntrain_test_split : Utility function to split the data into a development\n set usable for fitting a GridSearchCV instance and an evaluation set\n for its final evaluation.\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function." - }, - { - "name": "RandomizedSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A object of that type is instantiated for each grid point. This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_distributions", - "type": "Union[Dict, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (`str`) as keys and distributions or lists of parameters to try. Distributions must provide a ``rvs`` method for sampling (such as those from scipy.stats.distributions). If a list is given, it is sampled uniformly. If a list of dicts is given, first a dict is sampled uniformly, and then a parameter is sampled using that dict as above." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of parameter settings that are sampled. n_iter trades off runtime vs quality of the solution." - }, - { - "name": "scoring", - "type": "Union[Callable, str, Dict]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None" - }, - { - "name": "pre_dispatch", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "refit", - "type": "Union[str, bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given the ``cv_results``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``RandomizedSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. .. versionchanged:: 0.20 Support for callable added." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error." - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False``" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Search n_iter candidates from param_distributions" - } - ], - "docstring": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : estimator object.\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_distributions : dict or list of dicts\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\nn_iter : int, default=10\n Number of parameter settings that are sampled. n_iter trades\n off runtime vs quality of the solution.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\npre_dispatch : int, or str, default=None\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nrefit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given the ``cv_results``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``RandomizedSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +--------------+-------------+-------------------+---+---------------+\n | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n +==============+=============+===================+===+===============+\n | 'rbf' | 0.1 | 0.80 |...| 1 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.2 | 0.84 |...| 3 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.3 | 0.70 |...| 2 |\n +--------------+-------------+-------------------+---+---------------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n mask = False),\n 'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),\n 'split0_test_score' : [0.80, 0.84, 0.70],\n 'split1_test_score' : [0.82, 0.50, 0.70],\n 'mean_test_score' : [0.81, 0.67, 0.70],\n 'std_test_score' : [0.01, 0.24, 0.00],\n 'rank_test_score' : [1, 3, 2],\n 'split0_train_score' : [0.80, 0.92, 0.70],\n 'split1_train_score' : [0.82, 0.55, 0.70],\n 'mean_train_score' : [0.81, 0.74, 0.70],\n 'std_train_score' : [0.01, 0.19, 0.00],\n 'mean_fit_time' : [0.73, 0.63, 0.43],\n 'std_fit_time' : [0.01, 0.02, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00],\n 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n For multi-metric evaluation, this attribute is present only if\n ``refit`` is specified.\n\n See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\n This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\nmultimetric_ : bool\n Whether or not the scorers compute several metrics.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\nparameter setting(and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available. A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nSee Also\n--------\nGridSearchCV : Does exhaustive search over a grid of parameters.\nParameterSampler : A generator over parameter settings, constructed from\n param_distributions.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import RandomizedSearchCV\n>>> from scipy.stats import uniform\n>>> iris = load_iris()\n>>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n... random_state=0)\n>>> distributions = dict(C=uniform(loc=0, scale=4),\n... penalty=['l2', 'l1'])\n>>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n>>> search = clf.fit(iris.data, iris.target)\n>>> search.best_params_\n{'C': 2..., 'penalty': 'l1'}" - } - ], - "functions": [ - { - "name": "fit_grid_point", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[List, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets for input data." - }, - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A object of that type is instantiated for each grid point. This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "parameters", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to be set on estimator for this grid point." - }, - { - "name": "train", - "type": "Union[NDArray, bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Boolean mask or indices for training set." - }, - { - "name": "test", - "type": "Union[NDArray, bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Boolean mask or indices for test set." - }, - { - "name": "scorer", - "type": "Optional[Callable]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The scorer callable object / function must have its signature as ``scorer(estimator, X, y)``. If ``None`` the estimator's score method is used." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Verbosity level." - }, - { - "name": "**fit_params", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameter passed to the fit function of the estimator." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Run fit on one set of parameters.\n\nParameters\n----------\nX : array-like, sparse matrix or list\n Input data.\n\ny : array-like or None\n Targets for input data.\n\nestimator : estimator object\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparameters : dict\n Parameters to be set on estimator for this grid point.\n\ntrain : ndarray, dtype int or bool\n Boolean mask or indices for training set.\n\ntest : ndarray, dtype int or bool\n Boolean mask or indices for test set.\n\nscorer : callable or None\n The scorer callable object / function must have its signature as\n ``scorer(estimator, X, y)``.\n\n If ``None`` the estimator's score method is used.\n\nverbose : int\n Verbosity level.\n\n**fit_params : kwargs\n Additional parameter passed to the fit function of the estimator.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nReturns\n-------\nscore : float\n Score of this parameter setting on given test split.\n\nparameters : dict\n The parameters that have been evaluated.\n\nn_samples_test : int\n Number of test samples in this split." - }, - { - "name": "_check_param_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection._search_successive_halving", - "imports": [ - "from math import ceil", - "from math import floor", - "from math import log", - "from abc import abstractmethod", - "from numbers import Integral", - "import numpy as np", - "from _search import _check_param_grid", - "from _search import BaseSearchCV", - "from None import ParameterGrid", - "from None import ParameterSampler", - "from utils.validation import _num_samples", - "from base import is_classifier", - "from _split import check_cv", - "from _split import _yields_constant_splits", - "from utils import resample" - ], - "classes": [ - { - "name": "_SubsampleMetaSplitter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Splitter that subsamples a given fraction of the dataset" - }, - { - "name": "BaseSuccessiveHalving", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_input_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "Optional[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`)." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the estimator" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Run fit with all sets of parameters.\n\nParameters\n----------\n\nX : array-like, shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_output), optional\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator" - }, - { - "name": "_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_candidate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh" - }, - { - "name": "HalvingGridSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_grid", - "type": "Union[Dict, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (string) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings." - }, - { - "name": "factor", - "type": "Union[float, int]", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The 'halving' parameter, which determines the proportion of candidates that are selected for each subsequent iteration. For example, ``factor=3`` means that only one third of the candidates are selected." - }, - { - "name": "resource", - "type": "str", - "hasDefault": true, - "default": "'n_samples'", - "limitation": null, - "ignored": false, - "docstring": "Defines the resource that increases with each iteration. By default, the resource is the number of samples. It can also be set to any parameter of the base estimator that accepts positive integer values, e.g. 'n_iterations' or 'n_estimators' for a gradient boosting estimator. In this case ``max_resources`` cannot be 'auto' and must be set explicitly." - }, - { - "name": "max_resources", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The maximum amount of resource that any candidate is allowed to use for a given iteration. By default, this is set to ``n_samples`` when ``resource='n_samples'`` (default), else an error is raised." - }, - { - "name": "min_resources", - "type": "Literal['exhaust', 'smallest']", - "hasDefault": true, - "default": "'exhaust'", - "limitation": null, - "ignored": false, - "docstring": "The minimum amount of resource that any candidate is allowed to use for a given iteration. Equivalently, this defines the amount of resources `r0` that are allocated for each candidate at the first iteration. - 'smallest' is a heuristic that sets `r0` to a small value: - ``n_splits * 2`` when ``resource='n_samples'`` for a regression problem - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a classification problem - ``1`` when ``resource != 'n_samples'`` - 'exhaust' will set `r0` such that the **last** iteration uses as much resources as possible. Namely, the last iteration will use the highest value smaller than ``max_resources`` that is a multiple of both ``min_resources`` and ``factor``. In general, using 'exhaust' leads to a more accurate estimator, but is slightly more time consuming. Note that the amount of resources used at each iteration is always a multiple of ``min_resources``." - }, - { - "name": "aggressive_elimination", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This is only relevant in cases where there isn't enough resources to reduce the remaining candidates to at most `factor` after the last iteration. If ``True``, then the search process will 'replay' the first iteration for as long as needed until the number of candidates is small enough. This is ``False`` by default, which means that the last iteration may evaluate more than ``factor`` candidates. See :ref:`aggressive_elimination` for more details." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: Due to implementation details, the folds produced by `cv` must be the same across multiple calls to `cv.split()`. For built-in `scikit-learn` iterators, this can be achieved by deactivating shuffling (`shuffle=False`), or by setting the `cv`'s `random_state` parameter to an integer." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. If None, the estimator's score method is used." - }, - { - "name": "refit", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, refit an estimator using the best found parameters on the whole dataset. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``HalvingGridSearchCV`` instance." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. Default is ``np.nan``" - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for subsampling the dataset when `resources != 'n_samples'`. Ignored otherwise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_candidate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide `.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingGridSearchCV\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n Dictionary with parameters names (string) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\nfactor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\nmax_resources : int, default='auto'\n The maximum amount of resource that any candidate is allowed to use\n for a given iteration. By default, this is set to ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\nscoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\nrefit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingGridSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Ignored otherwise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\nn_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used\n at each iteration must be a multiple of ``min_resources_``, the\n actual number of resources used at the last iteration may be smaller\n than ``max_resources_``.\n\nmin_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\nn_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\nn_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\nn_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\nbest_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\nSee Also\n--------\n:class:`HalvingRandomSearchCV`:\n Random search over a set of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> from sklearn.model_selection import HalvingGridSearchCV\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n...\n>>> param_grid = {\"max_depth\": [3, None],\n... \"min_samples_split\": [5, 10]}\n>>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n... max_resources=10,\n... random_state=0).fit(X, y)\n>>> search.best_params_ # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}" - }, - { - "name": "HalvingRandomSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed." - }, - { - "name": "param_distributions", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary with parameters names (string) as keys and distributions or lists of parameters to try. Distributions must provide a ``rvs`` method for sampling (such as those from scipy.stats.distributions). If a list is given, it is sampled uniformly." - }, - { - "name": "n_candidates", - "type": "int", - "hasDefault": true, - "default": "'exhaust'", - "limitation": null, - "ignored": false, - "docstring": "The number of candidate parameters to sample, at the first iteration. Using 'exhaust' will sample enough candidates so that the last iteration uses as many resources as possible, based on `min_resources`, `max_resources` and `factor`. In this case, `min_resources` cannot be 'exhaust'." - }, - { - "name": "factor", - "type": "Union[float, int]", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "The 'halving' parameter, which determines the proportion of candidates that are selected for each subsequent iteration. For example, ``factor=3`` means that only one third of the candidates are selected." - }, - { - "name": "resource", - "type": "str", - "hasDefault": true, - "default": "'n_samples'", - "limitation": null, - "ignored": false, - "docstring": "Defines the resource that increases with each iteration. By default, the resource is the number of samples. It can also be set to any parameter of the base estimator that accepts positive integer values, e.g. 'n_iterations' or 'n_estimators' for a gradient boosting estimator. In this case ``max_resources`` cannot be 'auto' and must be set explicitly." - }, - { - "name": "max_resources", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of resources that any candidate is allowed to use for a given iteration. By default, this is set ``n_samples`` when ``resource='n_samples'`` (default), else an error is raised." - }, - { - "name": "min_resources", - "type": "Literal['exhaust', 'smallest']", - "hasDefault": true, - "default": "'smallest'", - "limitation": null, - "ignored": false, - "docstring": "The minimum amount of resource that any candidate is allowed to use for a given iteration. Equivalently, this defines the amount of resources `r0` that are allocated for each candidate at the first iteration. - 'smallest' is a heuristic that sets `r0` to a small value: - ``n_splits * 2`` when ``resource='n_samples'`` for a regression problem - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a classification problem - ``1`` when ``resource != 'n_samples'`` - 'exhaust' will set `r0` such that the **last** iteration uses as much resources as possible. Namely, the last iteration will use the highest value smaller than ``max_resources`` that is a multiple of both ``min_resources`` and ``factor``. In general, using 'exhaust' leads to a more accurate estimator, but is slightly more time consuming. 'exhaust' isn't available when `n_candidates='exhaust'`. Note that the amount of resources used at each iteration is always a multiple of ``min_resources``." - }, - { - "name": "aggressive_elimination", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This is only relevant in cases where there isn't enough resources to reduce the remaining candidates to at most `factor` after the last iteration. If ``True``, then the search process will 'replay' the first iteration for as long as needed until the number of candidates is small enough. This is ``False`` by default, which means that the last iteration may evaluate more than ``factor`` candidates. See :ref:`aggressive_elimination` for more details." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. note:: Due to implementation details, the folds produced by `cv` must be the same across multiple calls to `cv.split()`. For built-in `scikit-learn` iterators, this can be achieved by deactivating shuffling (`shuffle=False`), or by setting the `cv`'s `random_state` parameter to an integer." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. If None, the estimator's score method is used." - }, - { - "name": "refit", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, refit an estimator using the best found parameters on the whole dataset. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``HalvingRandomSearchCV`` instance." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. Default is ``np.nan``" - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state used for subsampling the dataset when `resources != 'n_samples'`. Also used for random uniform sampling from lists of possible values instead of scipy.stats distributions. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_jobs", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_candidate_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide`.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingRandomSearchCV\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_distributions : dict\n Dictionary with parameters names (string) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n\nn_candidates : int, default='exhaust'\n The number of candidate parameters to sample, at the first\n iteration. Using 'exhaust' will sample enough candidates so that the\n last iteration uses as many resources as possible, based on\n `min_resources`, `max_resources` and `factor`. In this case,\n `min_resources` cannot be 'exhaust'.\n\nfactor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\nmax_resources : int, default='auto'\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. By default, this is set ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='smallest'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or an iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\nscoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\nrefit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingRandomSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Also used for random uniform\n sampling from lists of possible values instead of scipy.stats\n distributions.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\nn_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used at\n each iteration must be a multiple of ``min_resources_``, the actual\n number of resources used at the last iteration may be smaller than\n ``max_resources_``.\n\nmin_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\nn_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\nn_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\nn_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\nbest_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\nSee Also\n--------\n:class:`HalvingGridSearchCV`:\n Search over a grid of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> from sklearn.model_selection import HalvingRandomSearchCV\n>>> from scipy.stats import randint\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n>>> np.random.seed(0)\n...\n>>> param_distributions = {\"max_depth\": [3, None],\n... \"min_samples_split\": randint(2, 11)}\n>>> search = HalvingRandomSearchCV(clf, param_distributions,\n... resource='n_estimators',\n... max_resources=10,\n... random_state=0).fit(X, y)\n>>> search.best_params_ # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}" - } - ], - "functions": [ - { - "name": "_refit_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_top_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection._split", - "imports": [ - "from collections.abc import Iterable", - "import warnings", - "from itertools import chain", - "from itertools import combinations", - "from math import ceil", - "from math import floor", - "import numbers", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from inspect import signature", - "import numpy as np", - "from scipy.special import comb", - "from utils import indexable", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils import _approximate_mode", - "from utils.validation import _num_samples", - "from utils.validation import column_or_1d", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import type_of_target", - "from base import _pprint" - ], - "classes": [ - { - "name": "BaseCrossValidator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates boolean masks corresponding to test sets.\n\nBy default, delegates to _iter_test_indices(X, y, groups)" - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates integer indices corresponding to test sets." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for all cross-validators\n\nImplementations must define `_iter_test_masks` or `_iter_test_indices`." - }, - { - "name": "LeaveOneOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - } - ], - "docstring": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneOut\n>>> X = np.array([[1, 2], [3, 4]])\n>>> y = np.array([1, 2])\n>>> loo = LeaveOneOut()\n>>> loo.get_n_splits(X)\n2\n>>> print(loo)\nLeaveOneOut()\n>>> for train_index, test_index in loo.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [1] TEST: [0]\n[[3 4]] [[1 2]] [2] [1]\nTRAIN: [0] TEST: [1]\n[[1 2]] [[3 4]] [1] [2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit,\n domain-specific stratification of the dataset.\nGroupKFold : K-fold iterator variant with non-overlapping groups." - }, - { - "name": "LeavePOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "p", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Size of the test sets. Must be strictly less than the number of samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility." - } - ], - "docstring": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\np : int\n Size of the test sets. Must be strictly less than the number of\n samples.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> lpo = LeavePOut(2)\n>>> lpo.get_n_splits(X)\n6\n>>> print(lpo)\nLeavePOut(p=2)\n>>> for train_index, test_index in lpo.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [0 2] TEST: [1 3]\nTRAIN: [0 1] TEST: [2 3]" - }, - { - "name": "_BaseKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - } - ], - "docstring": "Base class for KFold, GroupKFold, and StratifiedKFold" - }, - { - "name": "KFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle the data before splitting into batches. Note that the samples within each split will not be shuffled." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When `shuffle` is True, `random_state` affects the ordering of the indices, which controls the randomness of each fold. Otherwise, this parameter has no effect. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n Whether to shuffle the data before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold. Otherwise, this\n parameter has no effect.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import KFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4])\n>>> kf = KFold(n_splits=2)\n>>> kf.get_n_splits(X)\n2\n>>> print(kf)\nKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in kf.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [0 1] TEST: [2 3]\n\nNotes\n-----\nThe first ``n_samples % n_splits`` folds have size\n``n_samples // n_splits + 1``, other folds have size\n``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nStratifiedKFold : Takes group information into account to avoid building\n folds with imbalanced class distributions (for binary or multiclass\n classification tasks).\n\nGroupKFold : K-fold iterator variant with non-overlapping groups.\n\nRepeatedKFold : Repeats K-Fold n times." - }, - { - "name": "GroupKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "K-fold iterator variant with non-overlapping groups.\n\nThe same group will not appear in two different folds (the number of\ndistinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupKFold\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> groups = np.array([0, 0, 2, 2])\n>>> group_kfold = GroupKFold(n_splits=2)\n>>> group_kfold.get_n_splits(X, y, groups)\n2\n>>> print(group_kfold)\nGroupKFold(n_splits=2)\n>>> for train_index, test_index in group_kfold.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\n...\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [3 4]\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [3 4] [1 2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit\n domain-specific stratification of the dataset." - }, - { - "name": "StratifiedKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle each class's samples before splitting into batches. Note that the samples within each split will not be shuffled." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When `shuffle` is True, `random_state` affects the ordering of the indices, which controls the randomness of each fold for each class. Otherwise, leave `random_state` as `None`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_test_folds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features. Note that providing ``y`` is sufficient to generate the splits and hence ``np.zeros(n_samples)`` may be used as a placeholder for ``X`` instead of actual training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems. Stratification is done based on the y labels." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - } - ], - "docstring": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n Whether to shuffle each class's samples before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold for each class.\n Otherwise, leave `random_state` as `None`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> skf = StratifiedKFold(n_splits=2)\n>>> skf.get_n_splits(X, y)\n2\n>>> print(skf)\nStratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in skf.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Generate test sets such that all contain the same distribution of\n classes, or as close as possible.\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n ``y = [1, 0]`` should not change the indices generated.\n* Preserve order dependencies in the dataset ordering, when\n ``shuffle=False``: all samples from class k in some test set were\n contiguous in y, or separated in y by samples from classes other than k.\n* Generate test sets where the smallest and largest differ by at most one\n sample.\n\n.. versionchanged:: 0.22\n The previous implementation did not follow the last constraint.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times." - }, - { - "name": "TimeSeriesSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of splits. Must be at least 2. .. versionchanged:: 0.22 ``n_splits`` default value changed from 3 to 5." - }, - { - "name": "max_train_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum size for a single training set." - }, - { - "name": "test_size", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to limit the size of the test set. Defaults to ``n_samples // (n_splits + 1)``, which is the maximum allowed value with ``gap=0``. .. versionadded:: 0.24" - }, - { - "name": "gap", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to exclude from the end of each train set before the test set. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_splits : int, default=5\n Number of splits. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nmax_train_size : int, default=None\n Maximum size for a single training set.\n\ntest_size : int, default=None\n Used to limit the size of the test set. Defaults to\n ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n with ``gap=0``.\n\n .. versionadded:: 0.24\n\ngap : int, default=0\n Number of samples to exclude from the end of each train set before\n the test set.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import TimeSeriesSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> tscv = TimeSeriesSplit()\n>>> print(tscv)\nTimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0] TEST: [1]\nTRAIN: [0 1] TEST: [2]\nTRAIN: [0 1 2] TEST: [3]\nTRAIN: [0 1 2 3] TEST: [4]\nTRAIN: [0 1 2 3 4] TEST: [5]\n>>> # Fix test_size to 2 with 12 samples\n>>> X = np.random.randn(12, 2)\n>>> y = np.random.randint(0, 2, 12)\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3 4 5] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10 11]\n>>> # Add in a 2 period gap\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [10 11]\n\nNotes\n-----\nThe training set has size ``i * n_samples // (n_splits + 1)\n+ n_samples % (n_splits + 1)`` in the ``i`` th split,\nwith a test set of size ``n_samples//(n_splits + 1)`` by default,\nwhere ``n_samples`` is the number of samples." - }, - { - "name": "LeaveOneGroupOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. This 'groups' parameter must always be specified to calculate the number of splits, though the other parameters can be omitted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneGroupOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 1, 2])\n>>> groups = np.array([1, 1, 2, 2])\n>>> logo = LeaveOneGroupOut()\n>>> logo.get_n_splits(X, y, groups)\n2\n>>> logo.get_n_splits(groups=groups) # 'groups' is always required\n2\n>>> print(logo)\nLeaveOneGroupOut()\n>>> for train_index, test_index in logo.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [1 2] [1 2]\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [1 2]" - }, - { - "name": "LeavePGroupsOut", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_groups", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of groups (``p``) to leave out in the test split." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. This 'groups' parameter must always be specified to calculate the number of splits, though the other parameters can be omitted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_groups : int\n Number of groups (``p``) to leave out in the test split.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePGroupsOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1])\n>>> groups = np.array([1, 2, 3])\n>>> lpgo = LeavePGroupsOut(n_groups=2)\n>>> lpgo.get_n_splits(X, y, groups)\n3\n>>> lpgo.get_n_splits(groups=groups) # 'groups' is always required\n3\n>>> print(lpgo)\nLeavePGroupsOut(n_groups=2)\n>>> for train_index, test_index in lpgo.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [2] TEST: [0 1]\n[[5 6]] [[1 2]\n [3 4]] [1] [1 2]\nTRAIN: [1] TEST: [0 2]\n[[3 4]] [[1 2]\n [5 6]] [2] [1 1]\nTRAIN: [0] TEST: [1 2]\n[[1 2]] [[3 4]\n [5 6]] [1] [2 1]\n\nSee Also\n--------\nGroupKFold : K-fold iterator variant with non-overlapping groups." - }, - { - "name": "_RepeatedSplits", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "cv", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Cross-validator class." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of times cross-validator needs to be repeated." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Passes `random_state` to the arbitrary repeating cross validator. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "**cvargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Constructor parameters for cv. Must not contain random_state and shuffle." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility. ``np.zeros(n_samples)`` may be used as a placeholder." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility. ``np.zeros(n_samples)`` may be used as a placeholder." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\ny : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Repeated splits for an arbitrary randomized CV splitter.\n\nRepeats splits for cross-validators n times with different randomization\nin each repetition.\n\nParameters\n----------\ncv : callable\n Cross-validator class.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Passes `random_state` to the arbitrary repeating cross validator.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n**cvargs : additional params\n Constructor parameters for cv. Must not contain random_state\n and shuffle." - }, - { - "name": "RepeatedKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of times cross-validator needs to be repeated." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of each repeated cross-validation instance. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of each repeated cross-validation instance.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n>>> for train_index, test_index in rkf.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [0 1] TEST: [2 3]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times." - }, - { - "name": "RepeatedStratifiedKFold", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of folds. Must be at least 2." - }, - { - "name": "n_repeats", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of times cross-validator needs to be repeated." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the generation of the random states for each repetition. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the generation of the random states for each repetition.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedStratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n... random_state=36851234)\n>>> for train_index, test_index in rskf.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedKFold : Repeats K-Fold n times." - }, - { - "name": "BaseShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate (train, test) indices" - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for ShuffleSplit and StratifiedShuffleSplit" - }, - { - "name": "ShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of re-shuffling & splitting iterations." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.1." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the training and testing indices produced. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import ShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1, 2, 1, 2])\n>>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n>>> rs.get_n_splits(X)\n5\n>>> print(rs)\nShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n>>> for train_index, test_index in rs.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0 4] TEST: [5 2]\nTRAIN: [4 0 2 5] TEST: [1 3]\nTRAIN: [1 2 4 0] TEST: [3 5]\nTRAIN: [3 4 1 0] TEST: [5 2]\nTRAIN: [3 5 1 0] TEST: [2 4]\n>>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n... random_state=0)\n>>> for train_index, test_index in rs.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0] TEST: [5 2]\nTRAIN: [4 0 2] TEST: [1 3]\nTRAIN: [1 2 4] TEST: [3 5]\nTRAIN: [3 4 1] TEST: [5 2]\nTRAIN: [3 5 1] TEST: [2 4]" - }, - { - "name": "GroupShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of re-shuffling & splitting iterations." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of groups to include in the test split (rounded up). If int, represents the absolute number of test groups. If None, the value is set to the complement of the train size. The default will change in version 0.21. It will remain 0.2 only if ``train_size`` is unspecified, otherwise it will complement the specified ``train_size``." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the groups to include in the train split. If int, represents the absolute number of train groups. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the training and testing indices produced. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - } - ], - "docstring": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of re-shuffling & splitting iterations.\n\ntest_size : float, int, default=0.2\n If float, should be between 0.0 and 1.0 and represent the proportion\n of groups to include in the test split (rounded up). If int,\n represents the absolute number of test groups. If None, the value is\n set to the complement of the train size.\n The default will change in version 0.21. It will remain 0.2 only\n if ``train_size`` is unspecified, otherwise it will complement\n the specified ``train_size``.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the groups to include in the train split. If\n int, represents the absolute number of train groups. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupShuffleSplit\n>>> X = np.ones(shape=(8, 2))\n>>> y = np.ones(shape=(8, 1))\n>>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n>>> print(groups.shape)\n(8,)\n>>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n>>> gss.get_n_splits()\n2\n>>> for train_idx, test_idx in gss.split(X, y, groups):\n... print(\"TRAIN:\", train_idx, \"TEST:\", test_idx)\nTRAIN: [2 3 4 5 6 7] TEST: [0 1]\nTRAIN: [0 1 5 6 7] TEST: [2 3 4]" - }, - { - "name": "StratifiedShuffleSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_splits", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of re-shuffling & splitting iterations." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.1." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the training and testing indices produced. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_iter_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features. Note that providing ``y`` is sufficient to generate the splits and hence ``np.zeros(n_samples)`` may be used as a placeholder for ``X`` instead of actual training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems. Stratification is done based on the y labels." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_labels)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer." - } - ], - "docstring": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 0, 1, 1, 1])\n>>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n>>> sss.get_n_splits(X, y)\n5\n>>> print(sss)\nStratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n>>> for train_index, test_index in sss.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [5 2 3] TEST: [4 1 0]\nTRAIN: [5 1 4] TEST: [0 2 3]\nTRAIN: [5 0 2] TEST: [4 3 1]\nTRAIN: [4 1 0] TEST: [2 3 5]\nTRAIN: [0 5 1] TEST: [3 4 2]" - }, - { - "name": "PredefinedSplit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "test_fold", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The entry ``test_fold[i]`` represents the index of the test set that sample ``i`` belongs to. It is possible to exclude sample ``i`` from any test set (i.e. include sample ``i`` in every training set) by setting ``test_fold[i]`` equal to -1." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - }, - { - "name": "_iter_test_masks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generates boolean masks corresponding to test sets." - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - } - ], - "docstring": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\ntest_fold : array-like of shape (n_samples,)\n The entry ``test_fold[i]`` represents the index of the test set that\n sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n any test set (i.e. include sample ``i`` in every training set) by\n setting ``test_fold[i]`` equal to -1.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import PredefinedSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> test_fold = [0, 1, -1, 1]\n>>> ps = PredefinedSplit(test_fold)\n>>> ps.get_n_splits()\n2\n>>> print(ps)\nPredefinedSplit(test_fold=array([ 0, 1, -1, 1]))\n>>> for train_index, test_index in ps.split():\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 2 3] TEST: [0]\nTRAIN: [0 2] TEST: [1 3]" - }, - { - "name": "_CVIterableWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator." - }, - { - "name": "split", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - }, - { - "name": "groups", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Always ignored, exists for compatibility." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split." - } - ], - "docstring": "Wrapper class for old style cv objects and iterables." - } - ], - "functions": [ - { - "name": "_validate_shuffle_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation helper to check if the test/test sizes are meaningful wrt to the\nsize of the data (n_samples)" - }, - { - "name": "check_cv", - "decorators": [], - "parameters": [ - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if classifier is True and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value changed from 3-fold to 5-fold." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable for supervised learning problems." - }, - { - "name": "classifier", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether the task is a classification task, in which case stratified KFold will be used." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Input checker utility for building a cross-validator\n\nParameters\n----------\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if classifier is True and ``y`` is either\n binary or multiclass, :class:`StratifiedKFold` is used. In all other\n cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value changed from 3-fold to 5-fold.\n\ny : array-like, default=None\n The target variable for supervised learning problems.\n\nclassifier : bool, default=False\n Whether the task is a classification task, in which case\n stratified KFold will be used.\n\nReturns\n-------\nchecked_cv : a cross-validator instance.\n The return value is a cross-validator which generates the train/test\n splits via the ``split`` method." - }, - { - "name": "train_test_split", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Allowed inputs are lists, numpy arrays, scipy-sparse matrices or pandas dataframes." - }, - { - "name": "test_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.25." - }, - { - "name": "train_size", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the shuffling applied to the data before applying the split. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to shuffle the data before splitting. If shuffle=False then stratify must be None." - }, - { - "name": "stratify", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, data is split in a stratified fashion, using this as the class labels. Read more in the :ref:`User Guide `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split arrays or matrices into random train and test subsets\n\nQuick utility that wraps input validation and\n``next(ShuffleSplit().split(X, y))`` and application to input data\ninto a single call for splitting (and optionally subsampling) data in a\noneliner.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*arrays : sequence of indexables with same length / shape[0]\n Allowed inputs are lists, numpy arrays, scipy-sparse\n matrices or pandas dataframes.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.25.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the shuffling applied to the data before applying the split.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n\nshuffle : bool, default=True\n Whether or not to shuffle the data before splitting. If shuffle=False\n then stratify must be None.\n\nstratify : array-like, default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n Read more in the :ref:`User Guide `.\n\nReturns\n-------\nsplitting : list, length=2 * len(arrays)\n List containing train-test split of inputs.\n\n .. versionadded:: 0.16\n If the input is sparse, the output will be a\n ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n input type.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = np.arange(10).reshape((5, 2)), range(5)\n>>> X\narray([[0, 1],\n [2, 3],\n [4, 5],\n [6, 7],\n [8, 9]])\n>>> list(y)\n[0, 1, 2, 3, 4]\n\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, random_state=42)\n...\n>>> X_train\narray([[4, 5],\n [0, 1],\n [6, 7]])\n>>> y_train\n[2, 0, 3]\n>>> X_test\narray([[2, 3],\n [8, 9]])\n>>> y_test\n[1, 4]\n\n>>> train_test_split(y, shuffle=False)\n[[0, 1, 2], [3, 4]]" - }, - { - "name": "_build_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yields_constant_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection._validation", - "imports": [ - "import warnings", - "import numbers", - "import time", - "from traceback import format_exc", - "from contextlib import suppress", - "import numpy as np", - "import scipy.sparse as sp", - "from joblib import Parallel", - "from joblib import logger", - "from base import is_classifier", - "from base import clone", - "from utils import indexable", - "from utils import check_random_state", - "from utils import _safe_indexing", - "from utils.validation import _check_fit_params", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.fixes import delayed", - "from utils.metaestimators import _safe_split", - "from metrics import check_scoring", - "from metrics._scorer import _check_multimetric_scoring", - "from metrics._scorer import _MultimetricScorer", - "from exceptions import FitFailedWarning", - "from exceptions import NotFittedError", - "from _split import check_cv", - "from preprocessing import LabelEncoder" - ], - "classes": [], - "functions": [ - { - "name": "cross_validate", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. Can be for example a list, or an array." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's score method is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the cross-validation splits. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'2*n_jobs'", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to include train scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21 Default value was changed from ``True`` to ``False``" - }, - { - "name": "return_estimator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the estimators fitted on each split. .. versionadded:: 0.20" - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate metric(s) by cross-validation and also record fit/score times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\nscoring : str, callable, list/tuple, or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n For evaluating multiple metrics, either give a list of (unique) strings\n or a dict with names as keys and callables as values.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nreturn_train_score : bool, default=False\n Whether to include train scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\nreturn_estimator : bool, default=False\n Whether to return the estimators fitted on each split.\n\n .. versionadded:: 0.20\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscores : dict of float arrays of shape (n_splits,)\n Array of scores of the estimator for each run of the cross validation.\n\n A dict of arrays containing the score/time arrays for each scorer is\n returned. The possible keys for this ``dict`` are:\n\n ``test_score``\n The score array for test scores on each cv split.\n Suffix ``_score`` in ``test_score`` changes to a specific\n metric like ``test_r2`` or ``test_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n ``train_score``\n The score array for train scores on each cv split.\n Suffix ``_score`` in ``train_score`` changes to a specific\n metric like ``train_r2`` or ``train_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n This is available only if ``return_train_score`` parameter\n is ``True``.\n ``fit_time``\n The time for fitting the estimator on the train\n set for each cv split.\n ``score_time``\n The time for scoring the estimator on the test set for each\n cv split. (Note time for scoring on the train set is not\n included even if ``return_train_score`` is set to ``True``\n ``estimator``\n The estimator objects for each cv split.\n This is available only if ``return_estimator`` parameter\n is set to ``True``.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_validate\n>>> from sklearn.metrics import make_scorer\n>>> from sklearn.metrics import confusion_matrix\n>>> from sklearn.svm import LinearSVC\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n\nSingle metric evaluation using ``cross_validate``\n\n>>> cv_results = cross_validate(lasso, X, y, cv=3)\n>>> sorted(cv_results.keys())\n['fit_time', 'score_time', 'test_score']\n>>> cv_results['test_score']\narray([0.33150734, 0.08022311, 0.03531764])\n\nMultiple metric evaluation using ``cross_validate``\n(please refer the ``scoring`` parameter doc for more information)\n\n>>> scores = cross_validate(lasso, X, y, cv=3,\n... scoring=('r2', 'neg_mean_squared_error'),\n... return_train_score=True)\n>>> print(scores['test_neg_mean_squared_error'])\n[-3635.5... -3573.3... -6114.7...]\n>>> print(scores['train_r2'])\n[0.28010158 0.39088426 0.22784852]\n\nSee Also\n---------\ncross_val_score : Run cross-validation for single metric evaluation.\n\ncross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function." - }, - { - "name": "_insert_error_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Insert error in `results` by replacing them inplace with `error_score`.\n\nThis only applies to multimetric scores because `_fit_and_score` will\nhandle the single metric case." - }, - { - "name": "_normalize_score_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Creates a scoring dictionary based on the type of `scores`" - }, - { - "name": "cross_val_score", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. Can be for example a list, or an array." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)`` which should return only a single value. Similar to :func:`cross_validate` but only a single metric is permitted. If None, the estimator's default scorer (if available) is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the cross-validation splits. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'2*n_jobs'", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate a score by cross-validation\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)`` which should return only\n a single value.\n\n Similar to :func:`cross_validate`\n but only a single metric is permitted.\n\n If None, the estimator's default scorer (if available) is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscores : ndarray of float of shape=(len(list(cv)),)\n Array of scores of the estimator for each run of the cross validation.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_val_score\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n>>> print(cross_val_score(lasso, X, y, cv=3))\n[0.33150734 0.08022311 0.03531764]\n\nSee Also\n---------\ncross_validate : To run cross-validation on multiple metrics and also to\n return train scores, fit times and score times.\n\ncross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function." - }, - { - "name": "_fit_and_score", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "scorer", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If it is a single callable, the return value for ``train_scores`` and ``test_scores`` is a single float. For a dict, it should be one mapping the scorer name to the scorer callable object / function. The callable object / fn should have signature ``scorer(estimator, X, y)``." - }, - { - "name": "train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of training samples." - }, - { - "name": "test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of test samples." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised." - }, - { - "name": "parameters", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to be set on the estimator." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters that will be passed to ``estimator.fit``." - }, - { - "name": "return_train_score", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Compute and return score on training set." - }, - { - "name": "return_parameters", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Return parameters that has been used for the estimator." - }, - { - "name": "split_progress", - "type": "Union[List, Tuple[]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list or tuple of format (, )." - }, - { - "name": "candidate_progress", - "type": "Union[List, Tuple[]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list or tuple of format (, )." - }, - { - "name": "return_n_test_samples", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the ``n_test_samples``." - }, - { - "name": "return_times", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the fit/score times." - }, - { - "name": "return_estimator", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the fitted estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator and compute scores for a given dataset split.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\nscorer : A single callable or dict mapping scorer name to the callable\n If it is a single callable, the return value for ``train_scores`` and\n ``test_scores`` is a single float.\n\n For a dict, it should be one mapping the scorer name to the scorer\n callable object / function.\n\n The callable object / fn should have signature\n ``scorer(estimator, X, y)``.\n\ntrain : array-like of shape (n_train_samples,)\n Indices of training samples.\n\ntest : array-like of shape (n_test_samples,)\n Indices of test samples.\n\nverbose : int\n The verbosity level.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\nparameters : dict or None\n Parameters to be set on the estimator.\n\nfit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\nreturn_train_score : bool, default=False\n Compute and return score on training set.\n\nreturn_parameters : bool, default=False\n Return parameters that has been used for the estimator.\n\nsplit_progress : {list, tuple} of int, default=None\n A list or tuple of format (, ).\n\ncandidate_progress : {list, tuple} of int, default=None\n A list or tuple of format\n (, ).\n\nreturn_n_test_samples : bool, default=False\n Whether to return the ``n_test_samples``.\n\nreturn_times : bool, default=False\n Whether to return the fit/score times.\n\nreturn_estimator : bool, default=False\n Whether to return the fitted estimator.\n\nReturns\n-------\nresult : dict with the following attributes\n train_scores : dict of scorer name -> float\n Score on training set (for all the scorers),\n returned only if `return_train_score` is `True`.\n test_scores : dict of scorer name -> float\n Score on testing set (for all the scorers).\n n_test_samples : int\n Number of test samples.\n fit_time : float\n Time spent for fitting in seconds.\n score_time : float\n Time spent for scoring in seconds.\n parameters : dict or None\n The parameters that have been evaluated.\n estimator : estimator object\n The fitted estimator.\n fit_failed : bool\n The estimator failed to fit." - }, - { - "name": "_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the score(s) of an estimator on a given test set.\n\nWill return a dict of floats if `scorer` is a dict, otherwise a single\nfloat is returned." - }, - { - "name": "cross_val_predict", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. Can be, for example a list, or an array at least 2d." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and predicting are parallelized over the cross-validation splits. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'2*n_jobs'", - "limitation": null, - "ignored": false, - "docstring": "Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'" - }, - { - "name": "method", - "type": "Literal['predict', 'predict_proba', 'predict_log_proba', 'decision_function']", - "hasDefault": true, - "default": "'predict'", - "limitation": null, - "ignored": false, - "docstring": "The method to be invoked by `estimator`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate cross-validated estimates for each input data point\n\nThe data is split according to the cv parameter. Each sample belongs\nto exactly one test set, and its prediction is computed with an\nestimator fitted on the corresponding training set.\n\nPassing these predictions into an evaluation metric may not be a valid\nway to measure generalization performance. Results can differ from\n:func:`cross_validate` and :func:`cross_val_score` unless all tests sets\nhave equal size and the metric decomposes over samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be, for example a list, or an array at least 2d.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and\n predicting are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, defualt=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nmethod : {'predict', 'predict_proba', 'predict_log_proba', 'decision_function'}, default='predict'\n The method to be invoked by `estimator`.\n\nReturns\n-------\npredictions : ndarray\n This is the result of calling `method`. Shape:\n\n - When `method` is 'predict' and in special case where `method` is\n 'decision_function' and the target is binary: (n_samples,)\n - When `method` is one of {'predict_proba', 'predict_log_proba',\n 'decision_function'} (unless special case above):\n (n_samples, n_classes)\n - If `estimator` is :term:`multioutput`, an extra dimension\n 'n_outputs' is added to the end of each shape above.\n\nSee Also\n--------\ncross_val_score : Calculate score for each CV split.\ncross_validate : Calculate one or more scores and timings for each CV\n split.\n\nNotes\n-----\nIn the case that one or more classes are absent in a training portion, a\ndefault score needs to be assigned to all instances for that class if\n``method`` produces columns per class, as in {'decision_function',\n'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is\n0. In order to ensure finite output, we approximate negative infinity by\nthe minimum finite float value for the dtype in other cases.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_val_predict\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n>>> y_pred = cross_val_predict(lasso, X, y, cv=3)" - }, - { - "name": "_fit_and_predict", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit. .. versionchanged:: 0.20 X is only required to be an object with finite length or shape now" - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "train", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of training samples." - }, - { - "name": "test", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices of test samples." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters that will be passed to ``estimator.fit``." - }, - { - "name": "method", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Invokes the passed method name of the passed estimator." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit estimator and predict values for a given dataset split.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit.\n\n .. versionchanged:: 0.20\n X is only required to be an object with finite length or shape now\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\ntrain : array-like of shape (n_train_samples,)\n Indices of training samples.\n\ntest : array-like of shape (n_test_samples,)\n Indices of test samples.\n\nverbose : int\n The verbosity level.\n\nfit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\nmethod : str\n Invokes the passed method name of the passed estimator.\n\nReturns\n-------\npredictions : sequence\n Result of calling 'estimator.method'" - }, - { - "name": "_enforce_prediction_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure that prediction arrays have correct column order\n\nWhen doing cross-validation, if one or more classes are\nnot present in the subset of data used for training,\nthen the output prediction array might not have the same\ncolumns as other folds. Use the list of class names\n(assumed to be ints) to enforce the correct column order.\n\nNote that `classes` is the list of classes in this fold\n(a subset of the classes in the full training set)\nand `n_classes` is the number of classes in the full training set." - }, - { - "name": "_check_is_permutation", - "decorators": [], - "parameters": [ - { - "name": "indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "int array to test" - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of expected elements" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check whether indices is a reordering of the array np.arange(n_samples)\n\nParameters\n----------\nindices : ndarray\n int array to test\nn_samples : int\n number of expected elements\n\nReturns\n-------\nis_partition : bool\n True iff sorted(indices) is np.arange(n)" - }, - { - "name": "permutation_test_score", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The object to use to fit the data." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to fit." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target variable to try to predict in the case of supervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels to constrain permutation within groups, i.e. ``y`` values are permuted among samples with the same group identifier. When not specified, ``y`` values are permuted among all samples. When a grouped cross-validator is used, the group labels are also passed on to the ``split`` method of the cross-validator. The cross-validator uses them for grouping the samples while splitting the dataset into train/test set." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A single str (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. If None the estimator's score method is used." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "n_permutations", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of times to permute ``y``." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the cross-validated score are parallelized over the permutations. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pass an int for reproducible output for permutation of ``y`` values among samples. See :term:`Glossary `." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the significance of a cross-validated score with permutations\n\nPermutes targets to generate 'randomized data' and compute the empirical\np-value against the null hypothesis that features and targets are\nindependent.\n\nThe p-value represents the fraction of randomized data sets where the\nestimator performed as well or better than in the original data. A small\np-value suggests that there is a real dependency between features and\ntargets which has been used by the estimator to give good predictions.\nA large p-value may be due to lack of real dependency between features\nand targets or the estimator was not able to use the dependency to\ngive good predictions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape at least 2D\n The data to fit.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Labels to constrain permutation within groups, i.e. ``y`` values\n are permuted among samples with the same group identifier.\n When not specified, ``y`` values are permuted among all samples.\n\n When a grouped cross-validator is used, the group labels are\n also passed on to the ``split`` method of the cross-validator. The\n cross-validator uses them for grouping the samples while splitting\n the dataset into train/test set.\n\nscoring : str or callable, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n If None the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_permutations : int, default=100\n Number of times to permute ``y``.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the cross-validated score are parallelized over the permutations.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance or None, default=0\n Pass an int for reproducible output for permutation of\n ``y`` values among samples. See :term:`Glossary `.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nscore : float\n The true score without permuting targets.\n\npermutation_scores : array of shape (n_permutations,)\n The scores obtained for each permutations.\n\npvalue : float\n The p-value, which approximates the probability that the score would\n be obtained by chance. This is calculated as:\n\n `(C + 1) / (n_permutations + 1)`\n\n Where C is the number of permutations whose score >= the true score.\n\n The best possible p-value is 1/(n_permutations + 1), the worst is 1.0.\n\nNotes\n-----\nThis function implements Test 1 in:\n\n Ojala and Garriga. `Permutation Tests for Studying Classifier\n Performance\n `_. The\n Journal of Machine Learning Research (2010) vol. 11" - }, - { - "name": "_permutation_test_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Auxiliary function for permutation_test_score" - }, - { - "name": "_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return a shuffled copy of y eventually shuffle among same groups." - }, - { - "name": "learning_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An object of that type which is cloned for each validation." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "train_sizes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "exploit_incremental_learning", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If the estimator supports incremental learning, this will be used to speed up fitting for different training set sizes." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the different training and test sets. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The str can be an expression like '2*n_jobs'." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle training data before taking prefixes of it based on``train_sizes``." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used when ``shuffle`` is True. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - }, - { - "name": "return_times", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to return the fit and score times." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ntrain_sizes : array-like of shape (n_ticks,), default=np.linspace(0.1, 1.0, 5)\n Relative or absolute numbers of training examples that will be used to\n generate the learning curve. If the dtype is float, it is regarded as a\n fraction of the maximum size of the training set (that is determined\n by the selected validation method), i.e. it has to be within (0, 1].\n Otherwise it is interpreted as absolute sizes of the training sets.\n Note that for classification the number of samples usually have to\n be big enough to contain at least one sample from each class.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nexploit_incremental_learning : bool, default=False\n If the estimator supports incremental learning, this will be\n used to speed up fitting for different training set sizes.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the different training and test sets.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\nshuffle : bool, default=False\n Whether to shuffle training data before taking prefixes of it\n based on``train_sizes``.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``shuffle`` is True. Pass an int for reproducible\n output across multiple function calls.\n See :term:`Glossary `.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nreturn_times : bool, default=False\n Whether to return the fit and score times.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that has been used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed.\n\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\nfit_times : array of shape (n_ticks, n_cv_folds)\n Times spent for fitting in seconds. Only present if ``return_times``\n is True.\n\nscore_times : array of shape (n_ticks, n_cv_folds)\n Times spent for scoring in seconds. Only present if ``return_times``\n is True.\n\nNotes\n-----\nSee :ref:`examples/model_selection/plot_learning_curve.py\n`" - }, - { - "name": "_translate_train_sizes", - "decorators": [], - "parameters": [ - { - "name": "train_sizes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of 'n_max_training_samples', i.e. it has to be within (0, 1]." - }, - { - "name": "n_max_training_samples", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of training samples (upper bound of 'train_sizes')." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n _translate_train_sizes([5, 10], 10) -> [5, 10]\n\nParameters\n----------\ntrain_sizes : array-like of shape (n_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. If the dtype is float, it is regarded as a\n fraction of 'n_max_training_samples', i.e. it has to be within (0, 1].\n\nn_max_training_samples : int\n Maximum number of training samples (upper bound of 'train_sizes').\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed." - }, - { - "name": "_incremental_fit_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Train estimator on training subsets incrementally and compute scores." - }, - { - "name": "validation_curve", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An object of that type which is cloned for each validation." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "param_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of the parameter that will be varied." - }, - { - "name": "param_range", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The values of the parameter that will be evaluated." - }, - { - "name": "groups", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a \"Group\" :term:`cv` instance (e.g., :class:`GroupKFold`)." - }, - { - "name": "cv", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold." - }, - { - "name": "scoring", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of jobs to run in parallel. Training the estimator and computing the score are parallelized over the combinations of each parameter value and each cross-validation split. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - }, - { - "name": "pre_dispatch", - "type": "Union[str, int]", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The str can be an expression like '2*n_jobs'." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Controls the verbosity: the higher, the more messages." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to pass to the fit method of the estimator. .. versionadded:: 0.24" - }, - { - "name": "error_score", - "type": "Literal['raise']", - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation curve.\n\nDetermine training and test scores for varying parameter values.\n\nCompute scores for an estimator with different values of a specified\nparameter. This is similar to grid search with one parameter. However, this\nwill also compute training scores and is merely a utility for plotting the\nresults.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nparam_name : str\n Name of the parameter that will be varied.\n\nparam_range : array-like of shape (n_values,)\n The values of the parameter that will be evaluated.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the combinations of each parameter\n value and each cross-validation split.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\nNotes\n-----\nSee :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py`" - }, - { - "name": "_aggregate_score_dicts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Aggregate the list of dict to dict of np ndarray\n\nThe aggregated output of _aggregate_score_dicts will be a list of dict\nof form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]\nConvert it to a dict of array {'prec': np.array([0.1 ...]), ...}\n\nParameters\n----------\n\nscores : list of dict\n List of dicts of the scores for all scorers. This is a flat list,\n assumed originally to be of row major order.\n\nExample\n-------\n\n>>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3},\n... {'a': 10, 'b': 10}] # doctest: +SKIP\n>>> _aggregate_score_dicts(scores) # doctest: +SKIP\n{'a': array([1, 2, 3, 10]),\n 'b': array([10, 2, 3, 10])}" - } - ] - }, - { - "name": "sklearn.model_selection", - "imports": [ - "import typing", - "from _split import BaseCrossValidator", - "from _split import KFold", - "from _split import GroupKFold", - "from _split import StratifiedKFold", - "from _split import TimeSeriesSplit", - "from _split import LeaveOneGroupOut", - "from _split import LeaveOneOut", - "from _split import LeavePGroupsOut", - "from _split import LeavePOut", - "from _split import RepeatedKFold", - "from _split import RepeatedStratifiedKFold", - "from _split import ShuffleSplit", - "from _split import GroupShuffleSplit", - "from _split import StratifiedShuffleSplit", - "from _split import PredefinedSplit", - "from _split import train_test_split", - "from _split import check_cv", - "from _validation import cross_val_score", - "from _validation import cross_val_predict", - "from _validation import cross_validate", - "from _validation import learning_curve", - "from _validation import permutation_test_score", - "from _validation import validation_curve", - "from _search import GridSearchCV", - "from _search import RandomizedSearchCV", - "from _search import ParameterGrid", - "from _search import ParameterSampler", - "from _search import fit_grid_point", - "from _search_successive_halving import HalvingGridSearchCV", - "from _search_successive_halving import HalvingRandomSearchCV" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.model_selection.tests.common", - "imports": [ - "import numpy as np", - "from sklearn.model_selection import KFold" - ], - "classes": [ - { - "name": "OneTimeSplitter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Split can be called only once" - }, - { - "name": "get_n_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A wrapper to make KFold single entry cv iterator" - } - ], - "functions": [] - }, - { - "name": "sklearn.model_selection.tests.test_search", - "imports": [ - "from collections.abc import Iterable", - "from collections.abc import Sized", - "from io import StringIO", - "from itertools import chain", - "from itertools import product", - "from functools import partial", - "import pickle", - "import sys", - "from types import GeneratorType", - "import re", - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import MinimalClassifier", - "from sklearn.utils._testing import MinimalRegressor", - "from sklearn.utils._testing import MinimalTransformer", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.utils._mocking import MockDataFrame", - "from scipy.stats import bernoulli", - "from scipy.stats import expon", - "from scipy.stats import uniform", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.model_selection import fit_grid_point", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import StratifiedShuffleSplit", - "from sklearn.model_selection import LeaveOneGroupOut", - "from sklearn.model_selection import LeavePGroupsOut", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import GroupShuffleSplit", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RandomizedSearchCV", - "from sklearn.model_selection import ParameterGrid", - "from sklearn.model_selection import ParameterSampler", - "from sklearn.model_selection._search import BaseSearchCV", - "from sklearn.model_selection._validation import FitFailedWarning", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import SVC", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.cluster import KMeans", - "from sklearn.neighbors import KernelDensity", - "from sklearn.neighbors import LocalOutlierFactor", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.metrics import f1_score", - "from sklearn.metrics import recall_score", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import roc_auc_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import r2_score", - "from sklearn.metrics.pairwise import euclidean_distances", - "from sklearn.impute import SimpleImputer", - "from sklearn.pipeline import Pipeline", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.model_selection.tests.common import OneTimeSplitter", - "from pandas import Series", - "from pandas import DataFrame" - ], - "classes": [ - { - "name": "MockClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the parameter search algorithms" - }, - { - "name": "LinearSVCNoScore", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An LinearSVC classifier that has no score method." - }, - { - "name": "BrokenClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Broken classifier that cannot be fit twice" - }, - { - "name": "FailingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Classifier that raises a ValueError on fit()" - } - ], - "functions": [ - { - "name": "assert_grid_iter_equals_getitem", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validate_parameter_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameter_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_pipeline_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_SearchCV_with_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_no_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_score_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes__property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_trivial_cv_results_attr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_one_grid_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_when_param_grid_includes_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_bad_param_grid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_sparse_scoring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_precomputed_kernel_error_nonsquare", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_refit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_refit_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test refit=callable, which adds flexibility in identifying the\n\"best\" estimator." - }, - { - "name": "test_refit_callable_invalid_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation catches the errors when 'best_index_' returns an\ninvalid result." - }, - { - "name": "test_refit_callable_out_bound", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation catches the errors when 'best_index_' returns an\nout of bound result." - }, - { - "name": "test_refit_callable_multi_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test refit=callable in multiple metric evaluation setting" - }, - { - "name": "test_gridsearch_nd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_as_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_y_as_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pandas_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupervised_grid_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_no_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_param_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cv_results_array_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cv_results_keys", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_cv_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_cv_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_default_iid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_cv_results_multimetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_cv_results_multimetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "compare_cv_results_multimetric_with_single", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare multi-metric cv_results with the ensemble of multiple\nsingle metric cv_results from single metric grid/random search" - }, - { - "name": "compare_refit_methods_when_refit_with_acc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compare refit multi-metric search methods with single metric methods" - }, - { - "name": "test_search_cv_score_samples_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_score_samples_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_results_rank_tie_breaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_results_none_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_timing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_correct_score_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_grid_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_grid_point_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_with_multioutput_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_disabled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_allows_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_failing_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_failing_classifier_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameters_sampler_replacement", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stochastic_gradient_loss_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_train_scores_set_to_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_grid_search_cv_splits_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_inverse_transform_round_trip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test__custom_fit_no_run_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_empty_cv_iterator_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_bad_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_searchcv_raise_warning_with_non_finite_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_confusion_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_same_as_list_of_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_single_metric_same_as_single_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_error_on_invalid_key", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_error_failing_clf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_clf_all_fails", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_pairwise_property_delegated_to_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation of BaseSearchCV has the pairwise tag\nwhich matches the pairwise tag of its estimator.\nThis test make sure pairwise tag is delegated to the base estimator.\n\nNon-regression test for issue #13920." - }, - { - "name": "test_search_cv__pairwise_property_delegated_to_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation of BaseSearchCV has the _pairwise property\nwhich matches the _pairwise property of its estimator.\nThis test make sure _pairwise is delegated to the base estimator.\n\nNon-regression test for issue #13920." - }, - { - "name": "test_search_cv_pairwise_property_equivalence_of_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test implementation of BaseSearchCV has the pairwise tag\nwhich matches the pairwise tag of its estimator.\nThis test ensures the equivalence of 'precomputed'.\n\nNon-regression test for issue #13920." - }, - { - "name": "test_scalar_fit_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scalar_fit_param_compat", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_using_minimal_compatible_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests.test_split", - "imports": [ - "import warnings", - "import pytest", - "import numpy as np", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy import stats", - "from scipy.special import comb", - "from itertools import combinations", - "from itertools import combinations_with_replacement", - "from itertools import permutations", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import TimeSeriesSplit", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.model_selection import LeaveOneGroupOut", - "from sklearn.model_selection import LeavePOut", - "from sklearn.model_selection import LeavePGroupsOut", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.model_selection import GroupShuffleSplit", - "from sklearn.model_selection import StratifiedShuffleSplit", - "from sklearn.model_selection import PredefinedSplit", - "from sklearn.model_selection import check_cv", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RepeatedKFold", - "from sklearn.model_selection import RepeatedStratifiedKFold", - "from sklearn.linear_model import Ridge", - "from sklearn.model_selection._split import _validate_shuffle_split", - "from sklearn.model_selection._split import _build_repr", - "from sklearn.model_selection._split import _yields_constant_splits", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import make_classification", - "from sklearn.svm import SVC", - "from pandas import DataFrame" - ], - "classes": [], - "functions": [ - { - "name": "test_cross_validator_with_default_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_2d_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_valid_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cv_coverage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_valueerrors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_no_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_kfold_no_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_kfold_ratios", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_kfold_label_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_balance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratifiedkfold_balance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_kfold_stratifiedkfold_reproducibility", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_stratifiedkfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kfold_can_detect_dependent_samples_on_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_split_default_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_group_shuffle_split_default_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_respects_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_even", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_overlap_train_test_bug", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_shuffle_split_multilabel_many_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predefinedsplit_with_kfold_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_group_shuffle_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_one_p_group_out", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_group_out_changing_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_one_p_group_out_error_on_fewer_number_of_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_cv_value_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_cv_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_kfold_determinstic_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_n_splits_for_repeated_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_n_splits_for_repeated_stratified_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repeated_stratified_kfold_determinstic_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_invalid_sizes1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_invalid_sizes2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_default_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_mock_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_list_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shufflesplit_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shufflesplit_reproducible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratifiedshufflesplit_list_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_allow_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cv_iterable_wrapper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_group_kfold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_time_series_max_train_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_max_train_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_test_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_time_series_gap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nested_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_build_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_split_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_train_test_split_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_one_out_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_leave_p_out_empty_trainset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_state_shuffle_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_yields_constant_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests.test_successive_halving", - "imports": [ - "from math import ceil", - "import pytest", - "from scipy.stats import norm", - "from scipy.stats import randint", - "import numpy as np", - "from sklearn.datasets import make_classification", - "from sklearn.dummy import DummyClassifier", - "from sklearn.experimental import enable_halving_search_cv", - "from sklearn.model_selection import HalvingGridSearchCV", - "from sklearn.model_selection import HalvingRandomSearchCV", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.model_selection._search_successive_halving import _SubsampleMetaSplitter", - "from sklearn.model_selection._search_successive_halving import _top_k", - "from sklearn.model_selection._search_successive_halving import _refit_callable" - ], - "classes": [ - { - "name": "FastClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier that accepts parameters a, b, ... z.\n\nThese parameter don't affect the predictions and are useful for fast\ngrid searching." - } - ], - "functions": [ - { - "name": "test_aggressive_elimination", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_resources", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resource_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_search_discrete_distributions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_errors_randomized", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsample_splitter_shapes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_subsample_splitter_determinism", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_top_k", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_refit_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cv_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests.test_validation", - "imports": [ - "import os", - "import re", - "import sys", - "import tempfile", - "import warnings", - "from functools import partial", - "from time import sleep", - "import pytest", - "import numpy as np", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csr_matrix", - "from sklearn.exceptions import FitFailedWarning", - "from sklearn.model_selection.tests.test_search import FailingClassifier", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.utils.validation import _num_samples", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.model_selection import cross_validate", - "from sklearn.model_selection import permutation_test_score", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import StratifiedKFold", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.model_selection import LeaveOneGroupOut", - "from sklearn.model_selection import LeavePGroupsOut", - "from sklearn.model_selection import GroupKFold", - "from sklearn.model_selection import GroupShuffleSplit", - "from sklearn.model_selection import learning_curve", - "from sklearn.model_selection import validation_curve", - "from sklearn.model_selection._validation import _check_is_permutation", - "from sklearn.model_selection._validation import _fit_and_score", - "from sklearn.model_selection._validation import _score", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import load_diabetes", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import load_digits", - "from sklearn.metrics import explained_variance_score", - "from sklearn.metrics import make_scorer", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import confusion_matrix", - "from sklearn.metrics import precision_recall_fscore_support", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import r2_score", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import check_scoring", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import PassiveAggressiveClassifier", - "from sklearn.linear_model import RidgeClassifier", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.svm import SVC", - "from sklearn.svm import LinearSVC", - "from sklearn.cluster import KMeans", - "from sklearn.impute import SimpleImputer", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.pipeline import Pipeline", - "from io import StringIO", - "from sklearn.base import BaseEstimator", - "from sklearn.base import clone", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.utils import shuffle", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.model_selection.tests.common import OneTimeSplitter", - "from sklearn.model_selection import GridSearchCV", - "from pandas import Series", - "from pandas import DataFrame" - ], - "classes": [ - { - "name": "MockImprovingEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the learning curve" - }, - { - "name": "MockIncrementalImprovingEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier that provides partial_fit" - }, - { - "name": "MockEstimatorWithParameter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_training_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the validation curve" - }, - { - "name": "MockEstimatorWithSingleFitCallAllowed", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier that disallows repeated calls of fit method" - }, - { - "name": "MockClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The dummy arguments are to test that this fit function can\naccept non-array arguments through cross-validation, such as:\n - int\n - str (this is actually array-like)\n - object\n - function" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test the cross-validation" - }, - { - "name": "RFWithDecisionFunction", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_cross_val_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_many_jobs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_invalid_scoring_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_nested_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_validate_single_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_validate_multi_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_predict_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_score_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_with_score_func_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_with_score_func_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_test_score_allow_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_test_score_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_allow_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_decision_function_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_predict_proba_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_predict_log_proba_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_input_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_unbalanced", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_y_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_sparse_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_unsupervised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning_not_possible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning_unsupervised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_batch_and_incremental_learning_are_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_n_sample_range_out_of_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_remove_duplicate_sample_sizes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_with_boolean_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_with_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_curve_incremental_learning_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve_clone_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve_cv_splits_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_curve_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_permutation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_sparse_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_val_predict_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for tests of cross_val_predict with binary classification" - }, - { - "name": "check_cross_val_predict_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for tests of cross_val_predict with multiclass classification" - }, - { - "name": "check_cross_val_predict_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the output of cross_val_predict for 2D targets using\nEstimators which provide a predictions as a list with one\nelement per class." - }, - { - "name": "check_cross_val_predict_with_method_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cross_val_predict_with_method_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_method_checking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearchcv_cross_val_predict_with_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_multilabel_ovr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_multilabel_rf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_rare_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_with_method_multilabel_rf_rare_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_expected_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_predict_class_subset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_permutation_test_score_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_and_score_failing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_and_score_working", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_failing_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_val_score_failing_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cross_validate_failing_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "three_params_scorer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_and_score_verbosity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_multimetric_confusion_matrix_cross_validate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_validation_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.model_selection.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neighbors.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors._base", - "imports": [ - "from functools import partial", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import numbers", - "import numpy as np", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import issparse", - "import joblib", - "from joblib import Parallel", - "from joblib import effective_n_jobs", - "from _ball_tree import BallTree", - "from _kd_tree import KDTree", - "from base import BaseEstimator", - "from base import MultiOutputMixin", - "from base import is_classifier", - "from metrics import pairwise_distances_chunked", - "from metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS", - "from utils import check_array", - "from utils import gen_even_slices", - "from utils import _to_object_array", - "from utils.deprecation import deprecated", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import check_non_negative", - "from utils.fixes import delayed", - "from utils.fixes import parse_version", - "from exceptions import DataConversionWarning", - "from exceptions import EfficiencyWarning" - ], - "classes": [ - { - "name": "NeighborsBase", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_algorithm_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for nearest neighbors estimators." - }, - { - "name": "KNeighborsMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_kneighbors_reduce_func", - "decorators": [], - "parameters": [ - { - "name": "dist", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The distance matrix." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index in X which the first row of dist corresponds to." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors required for each sample." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\nParameters\n----------\ndist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\nstart : int\n The index in X which the first row of dist corresponds to.\n\nn_neighbors : int\n Number of neighbors required for each sample.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\ndist : array of shape (n_samples_chunk, n_neighbors)\n Returned only if `return_distance=True`.\n\nneigh : array of shape (n_samples_chunk, n_neighbors)\n The neighbors indices." - }, - { - "name": "kneighbors", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors required for each sample. The default is the value passed to the constructor." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed', default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nn_neighbors : int, default=None\n Number of neighbors required for each sample. The default is the\n value passed to the constructor.\n\nreturn_distance : bool, default=True\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_queries, n_neighbors)\n Array representing the lengths to points, only present if\n return_distance=True\n\nneigh_ind : ndarray of shape (n_queries, n_neighbors)\n Indices of the nearest points in the population matrix.\n\nExamples\n--------\nIn the following example, we construct a NearestNeighbors\nclass from an array representing our data set and ask who's\nthe closest point to [1,1,1]\n\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=1)\n>>> neigh.fit(samples)\nNearestNeighbors(n_neighbors=1)\n>>> print(neigh.kneighbors([[1., 1., 1.]]))\n(array([[0.5]]), array([[2]]))\n\nAs you can see, it returns [[0.5]], and [[2]], which means that the\nelement is at distance 0.5 and is the third element of samples\n(indexes start at 0). You can also query for multiple points:\n\n>>> X = [[0., 1., 0.], [1., 0., 1.]]\n>>> neigh.kneighbors(X, return_distance=False)\narray([[1],\n [2]]...)" - }, - { - "name": "kneighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. For ``metric='precomputed'`` the shape should be (n_queries, n_indexed). Otherwise the shape should be (n_queries, n_features)." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample. The default is the value passed to the constructor." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, in 'distance' the edges are Euclidean distance between points." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of k-Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed', default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n For ``metric='precomputed'`` the shape should be\n (n_queries, n_indexed). Otherwise the shape should be\n (n_queries, n_features).\n\nn_neighbors : int, default=None\n Number of neighbors for each sample. The default is the value\n passed to the constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=2)\n>>> neigh.fit(X)\nNearestNeighbors(n_neighbors=2)\n>>> A = neigh.kneighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\nSee Also\n--------\nNearestNeighbors.radius_neighbors_graph" - } - ], - "docstring": "Mixin for k-neighbors searches" - }, - { - "name": "RadiusNeighborsMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_radius_neighbors_reduce_func", - "decorators": [], - "parameters": [ - { - "name": "dist", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The distance matrix." - }, - { - "name": "start", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The index in X which the first row of dist corresponds to." - }, - { - "name": "radius", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The radius considered when making the nearest neighbors search." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\nParameters\n----------\ndist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\nstart : int\n The index in X which the first row of dist corresponds to.\n\nradius : float\n The radius considered when making the nearest neighbors search.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\ndist : list of ndarray of shape (n_samples_chunk,)\n Returned only if `return_distance=True`.\n\nneigh : list of ndarray of shape (n_samples_chunk,)\n The neighbors indices." - }, - { - "name": "radius_neighbors", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Limiting distance of neighbors to return. The default is the value passed to the constructor." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - }, - { - "name": "sort_results", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the distances and indices will be sorted by increasing distances before being returned. If False, the results may not be sorted. If `return_distance=False`, setting `sort_results=True` will result in an error. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Finds the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.\n\nParameters\n----------\nX : array-like of (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n Limiting distance of neighbors to return. The default is the value\n passed to the constructor.\n\nreturn_distance : bool, default=True\n Whether or not to return the distances.\n\nsort_results : bool, default=False\n If True, the distances and indices will be sorted by increasing\n distances before being returned. If False, the results may not\n be sorted. If `return_distance=False`, setting `sort_results=True`\n will result in an error.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n Array representing the distances to each point, only present if\n `return_distance=True`. The distance values are computed according\n to the ``metric`` constructor parameter.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n An array of arrays of indices of the approximate nearest points\n from the population matrix that lie within a ball of size\n ``radius`` around the query points.\n\nExamples\n--------\nIn the following example, we construct a NeighborsClassifier\nclass from an array representing our data set and ask who's\nthe closest point to [1, 1, 1]:\n\n>>> import numpy as np\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.6)\n>>> neigh.fit(samples)\nNearestNeighbors(radius=1.6)\n>>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n>>> print(np.asarray(rng[0][0]))\n[1.5 0.5]\n>>> print(np.asarray(rng[1][0]))\n[1 2]\n\nThe first array returned contains the distances to all points which\nare closer than 1.6, while the second array returned contains their\nindices. In general, multiple points can be queried at the same time.\n\nNotes\n-----\nBecause the number of neighbors of each point is not necessarily\nequal, the results for multiple query points cannot be fit in a\nstandard data array.\nFor efficiency, `radius_neighbors` returns arrays of objects, where\neach object is a 1D array of indices or distances." - }, - { - "name": "radius_neighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query point or points. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhoods. The default is the value passed to the constructor." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, in 'distance' the edges are Euclidean distance between points." - }, - { - "name": "sort_results", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, in each row of the result, the non-zero entries will be sorted by increasing distances. If False, the non-zero entries may not be sorted. Only used with mode='distance'. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n Radius of neighborhoods. The default is the value passed to the\n constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\nsort_results : bool, default=False\n If True, in each row of the result, the non-zero entries will be\n sorted by increasing distances. If False, the non-zero entries may\n not be sorted. Only used with mode='distance'.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix if of format CSR.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.5)\n>>> neigh.fit(X)\nNearestNeighbors(radius=1.5)\n>>> A = neigh.radius_neighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\nSee Also\n--------\nkneighbors_graph" - } - ], - "docstring": "Mixin for radius-based neighbors searches" - } - ], - "functions": [ - { - "name": "_check_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check to make sure weights are valid" - }, - { - "name": "_get_weights", - "decorators": [], - "parameters": [ - { - "name": "dist", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input distances." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The kind of weighting used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the weights from an array of distances and a parameter ``weights``\n\nParameters\n----------\ndist : ndarray\n The input distances.\n\nweights : {'uniform', 'distance' or a callable}\n The kind of weighting used.\n\nReturns\n-------\nweights_arr : array of the same shape as ``dist``\n If ``weights == 'uniform'``, then returns None." - }, - { - "name": "_is_sorted_by_data", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors graph as given by `kneighbors_graph` or `radius_neighbors_graph`. Matrix should be of format CSR format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the graph's non-zero entries are sorted by data\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n - sorted by indices, as after graph.sort_indices();\n - sorted by data, as after _check_precomputed(graph);\n - not sorted.\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nReturns\n-------\nres : bool\n Whether input graph is sorted by data." - }, - { - "name": "_check_precomputed", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distance matrix to other samples. X may be a sparse matrix, in which case only non-zero elements may be considered neighbors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check precomputed distance matrix\n\nIf the precomputed distance matrix is sparse, it checks that the non-zero\nentries are sorted by distances. If not, the matrix is copied and sorted.\n\nParameters\n----------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors.\n\nReturns\n-------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors." - }, - { - "name": "_kneighbors_from_graph", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors graph as given by `kneighbors_graph` or `radius_neighbors_graph`. Matrix should be of format CSR format." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors required for each sample." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decompose a nearest neighbors sparse graph into distances and indices\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nn_neighbors : int\n Number of neighbors required for each sample.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples, n_neighbors)\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\nneigh_ind : ndarray of shape (n_samples, n_neighbors)\n Indices of nearest neighbors." - }, - { - "name": "_radius_neighbors_from_graph", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors graph as given by `kneighbors_graph` or `radius_neighbors_graph`. Matrix should be of format CSR format." - }, - { - "name": "radius", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhoods which should be strictly positive." - }, - { - "name": "return_distance", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to return the distances." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decompose a nearest neighbors sparse graph into distances and indices\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nradius : float\n Radius of neighborhoods which should be strictly positive.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n Indices of nearest neighbors." - }, - { - "name": "_tree_query_parallel_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for the Parallel calls in KNeighborsMixin.kneighbors\n\nThe Cython method tree.query is not directly picklable by cloudpickle\nunder PyPy." - }, - { - "name": "_tree_query_radius_parallel_helper", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors\n\nThe Cython method tree.query_radius is not directly picklable by\ncloudpickle under PyPy." - } - ] - }, - { - "name": "sklearn.neighbors._classification", - "imports": [ - "import numpy as np", - "from scipy import stats", - "from utils.extmath import weighted_mode", - "from utils.validation import _is_arraylike", - "from utils.validation import _num_samples", - "import warnings", - "from _base import _check_weights", - "from _base import _get_weights", - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from base import ClassifierMixin", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KNeighborsClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. Doesn't affect :meth:`fit` method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the k-nearest neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : KNeighborsClassifier\n The fitted k-nearest neighbors classifier." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order." - } - ], - "docstring": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\nclasses_ : array of shape (n_classes,)\n Class labels known to the classifier\n\neffective_metric_ : str or callble\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\noutputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> neigh = KNeighborsClassifier(n_neighbors=3)\n>>> neigh.fit(X, y)\nKNeighborsClassifier(...)\n>>> print(neigh.predict([[1.1]]))\n[0]\n>>> print(neigh.predict_proba([[0.9]]))\n[[0.66666667 0.33333333]]\n\nSee Also\n--------\nRadiusNeighborsClassifier\nKNeighborsRegressor\nRadiusNeighborsRegressor\nNearestNeighbors\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances\n but different labels, the results will depend on the ordering of the\n training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - }, - { - "name": "RadiusNeighborsClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Range of parameter space to use by default for :meth:`radius_neighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "outlier_label", - "type": "Literal['most_frequent']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "label for outlier samples (samples with no neighbors in given radius). - manual label: str or int label (should be the same type as y) or list of manual labels if multi-output is used. - 'most_frequent' : assign the most frequent label of y to outliers. - None : when any outlier is detected, ValueError will be raised." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the radius neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : RadiusNeighborsClassifier\n The fitted radius neighbors classifier." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order." - } - ], - "docstring": "Classifier implementing a vote among neighbors within a given radius\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\noutlier_label : {manual label, 'most_frequent'}, default=None\n label for outlier samples (samples with no neighbors in given radius).\n\n - manual label: str or int label (should be the same type as y)\n or list of manual labels if multi-output is used.\n - 'most_frequent' : assign the most frequent label of y to outliers.\n - None : when any outlier is detected, ValueError will be raised.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier.\n\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\noutlier_label_ : int or array-like of shape (n_class,)\n Label which is given for outlier samples (samples with no neighbors\n on given radius).\n\noutputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsClassifier\n>>> neigh = RadiusNeighborsClassifier(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsClassifier(...)\n>>> print(neigh.predict([[1.5]]))\n[0]\n>>> print(neigh.predict_proba([[1.0]]))\n[[0.66666667 0.33333333]]\n\nSee Also\n--------\nKNeighborsClassifier\nRadiusNeighborsRegressor\nKNeighborsRegressor\nNearestNeighbors\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._graph", - "imports": [ - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from _base import NeighborsBase", - "from _unsupervised import NearestNeighbors", - "from base import TransformerMixin", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "KNeighborsTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "mode", - "type": "Literal['distance', 'connectivity']", - "hasDefault": true, - "default": "'distance'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample in the transformed sparse graph. For compatibility reasons, as each sample is considered as its own neighbor, one extra neighbor will be computed when mode == 'distance'. In this case, the sparse graph contains (n_neighbors + 1) neighbors." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the k-nearest neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\nReturns\n-------\nself : KNeighborsTransformer\n The fitted k-nearest neighbors transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_samples_transform, n_features)\n Sample data.\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training set.\n\ny : ignored\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform X into a (weighted) graph of k nearest neighbors\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nn_neighbors : int, default=5\n Number of neighbors for each sample in the transformed sparse graph.\n For compatibility reasons, as each sample is considered as its own\n neighbor, one extra neighbor will be computed when mode == 'distance'.\n In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> from sklearn.manifold import Isomap\n>>> from sklearn.neighbors import KNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> estimator = make_pipeline(\n... KNeighborsTransformer(n_neighbors=5, mode='distance'),\n... Isomap(neighbors_algorithm='precomputed'))" - }, - { - "name": "RadiusNeighborsTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "mode", - "type": "Literal['distance', 'connectivity']", - "hasDefault": true, - "default": "'distance'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhood in the transformed sparse graph." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Distance matrices are not supported. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the radius neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\nReturns\n-------\nself : RadiusNeighborsTransformer\n The fitted radius neighbors transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_samples_transform, n_features)\n Sample data\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training set." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training set.\n\ny : ignored\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform X into a (weighted) graph of neighbors nearer than a radius\n\nThe transformed data is a sparse graph as returned by\nradius_neighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nradius : float, default=1.\n Radius of neighborhood in the transformed sparse graph.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> from sklearn.neighbors import RadiusNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> estimator = make_pipeline(\n... RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n... DBSCAN(min_samples=30, metric='precomputed'))" - } - ], - "functions": [ - { - "name": "_check_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the validity of the input parameters" - }, - { - "name": "_query_include_self", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the query based on include_self param" - }, - { - "name": "kneighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, in the form of a numpy array or a precomputed :class:`BallTree`." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors for each sample." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "The distance metric used to calculate the k-Neighbors for each sample point. The DistanceMetric class gives a list of available metrics. The default distance is 'euclidean' ('minkowski' metric with the p param equal to 2.)" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "additional keyword arguments for the metric function." - }, - { - "name": "include_self", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to mark each sample as the first nearest neighbor to itself. If 'auto', then True is used for mode='connectivity' and False for mode='distance'." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of k-Neighbors for points in X\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\nn_neighbors : int\n Number of neighbors for each sample.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nmetric : str, default='minkowski'\n The distance metric used to calculate the k-Neighbors for each sample\n point. The DistanceMetric class gives a list of available metrics.\n The default distance is 'euclidean' ('minkowski' metric with the p\n param equal to 2.)\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n additional keyword arguments for the metric function.\n\ninclude_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that\n connects i to j. The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import kneighbors_graph\n>>> A = kneighbors_graph(X, 2, mode='connectivity', include_self=True)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\nSee Also\n--------\nradius_neighbors_graph" - }, - { - "name": "radius_neighbors_graph", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample data, in the form of a numpy array or a precomputed :class:`BallTree`." - }, - { - "name": "radius", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Radius of neighborhoods." - }, - { - "name": "mode", - "type": "Literal['connectivity', 'distance']", - "hasDefault": true, - "default": "'connectivity'", - "limitation": null, - "ignored": false, - "docstring": "Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "The distance metric used to calculate the neighbors within a given radius for each sample point. The DistanceMetric class gives a list of available metrics. The default distance is 'euclidean' ('minkowski' metric with the param equal to 2.)" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "additional keyword arguments for the metric function." - }, - { - "name": "include_self", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to mark each sample as the first nearest neighbor to itself. If 'auto', then True is used for mode='connectivity' and False for mode='distance'." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\nradius : float\n Radius of neighborhoods.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nmetric : str, default='minkowski'\n The distance metric used to calculate the neighbors within a\n given radius for each sample point. The DistanceMetric class\n gives a list of available metrics. The default distance is\n 'euclidean' ('minkowski' metric with the param equal to 2.)\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n additional keyword arguments for the metric function.\n\ninclude_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that connects\n i to j. The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import radius_neighbors_graph\n>>> A = radius_neighbors_graph(X, 1.5, mode='connectivity',\n... include_self=True)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\nSee Also\n--------\nkneighbors_graph" - } - ] - }, - { - "name": "sklearn.neighbors._kde", - "imports": [ - "import numpy as np", - "from scipy.special import gammainc", - "from base import BaseEstimator", - "from utils import check_array", - "from utils import check_random_state", - "from utils.validation import _check_sample_weight", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import row_norms", - "from _ball_tree import BallTree", - "from _ball_tree import DTYPE", - "from _kd_tree import KDTree" - ], - "classes": [ - { - "name": "KernelDensity", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "bandwidth", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The bandwidth of the kernel." - }, - { - "name": "algorithm", - "type": "Literal['kd_tree', 'ball_tree', 'auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The tree algorithm to use." - }, - { - "name": "kernel", - "type": "Literal['gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine']", - "hasDefault": true, - "default": "'gaussian'", - "limitation": null, - "ignored": false, - "docstring": "The kernel to use." - }, - { - "name": "metric", - "type": "str", - "hasDefault": true, - "default": "'euclidian'", - "limitation": null, - "ignored": false, - "docstring": "The distance metric to use. Note that not all metrics are valid with all algorithms. Refer to the documentation of :class:`BallTree` and :class:`KDTree` for a description of available algorithms. Note that the normalization of the density output is correct only for the Euclidean distance metric. Default is 'euclidean'." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The desired absolute tolerance of the result. A larger tolerance will generally lead to faster execution." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The desired relative tolerance of the result. A larger tolerance will generally lead to faster execution." - }, - { - "name": "breadth_first", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true (default), use a breadth-first approach to the problem. Otherwise use a depth-first approach." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "40", - "limitation": null, - "ignored": false, - "docstring": "Specify the leaf size of the underlying tree. See :class:`BallTree` or :class:`KDTree` for details." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional parameters to be passed to the tree for use with the metric. For more information, see the documentation of :class:`BallTree` or :class:`KDTree`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_choose_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of sample weights attached to the data X. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the Kernel Density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n List of sample weights attached to the data X.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n Returns instance of object." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An array of points to query. Last dimension should match dimension of training data (n_features)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluate the log density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n An array of points to query. Last dimension should match dimension\n of training data (n_features).\n\nReturns\n-------\ndensity : ndarray of shape (n_samples,)\n The array of log(density) evaluations. These are normalized to be\n probability densities, so values will be low for high-dimensional\n data." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of n_features-dimensional data points. Each row corresponds to a single data point." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the total log probability density under the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nlogprob : float\n Total log-likelihood of the data in X. This is normalized to be a\n probability density, so the value will be low for high-dimensional\n data." - }, - { - "name": "sample", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation used to generate random samples. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate random samples from the model.\n\nCurrently, this is implemented only for gaussian and tophat kernels.\n\nParameters\n----------\nn_samples : int, default=1\n Number of samples to generate.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to generate\n random samples. Pass an int for reproducible results\n across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_features)\n List of samples." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=1.0\n The bandwidth of the kernel.\n\nalgorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n The tree algorithm to use.\n\nkernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine'}, default='gaussian'\n The kernel to use.\n\nmetric : str, default='euclidian'\n The distance metric to use. Note that not all metrics are\n valid with all algorithms. Refer to the documentation of\n :class:`BallTree` and :class:`KDTree` for a description of\n available algorithms. Note that the normalization of the density\n output is correct only for the Euclidean distance metric. Default\n is 'euclidean'.\n\natol : float, default=0\n The desired absolute tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\nrtol : float, default=0\n The desired relative tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\nbreadth_first : bool, default=True\n If true (default), use a breadth-first approach to the problem.\n Otherwise use a depth-first approach.\n\nleaf_size : int, default=40\n Specify the leaf size of the underlying tree. See :class:`BallTree`\n or :class:`KDTree` for details.\n\nmetric_params : dict, default=None\n Additional parameters to be passed to the tree for use with the\n metric. For more information, see the documentation of\n :class:`BallTree` or :class:`KDTree`.\n\nAttributes\n----------\ntree_ : ``BinaryTree`` instance\n The tree algorithm for fast generalized N-point problems.\n\nSee Also\n--------\nsklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n problems.\nsklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n problems.\n\nExamples\n--------\nCompute a gaussian kernel density estimate with a fixed bandwidth.\n\n>>> import numpy as np\n>>> rng = np.random.RandomState(42)\n>>> X = rng.random_sample((100, 3))\n>>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n>>> log_density = kde.score_samples(X[:3])\n>>> log_density\narray([-1.52955942, -1.51462041, -1.60244657])" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._lof", - "imports": [ - "import numpy as np", - "import warnings", - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from base import OutlierMixin", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils import check_array" - ], - "classes": [ - { - "name": "LocalOutlierFactor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries. If n_neighbors is larger than the number of samples provided, all samples will be used." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "metric used for the distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a sparse matrix, in which case only \"nonzero\" elements may be considered neighbors. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics: https://docs.scipy.org/doc/scipy/reference/spatial.distance.html" - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "contamination", - "type": "Union[Literal['auto'], float]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. When fitting this is used to define the threshold on the scores of the samples. - if 'auto', the threshold is determined as in the original paper, - if a float, the contamination should be in the range [0, 0.5]. .. versionchanged:: 0.22 The default value of ``contamination`` changed from 0.1 to ``'auto'``." - }, - { - "name": "novelty", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "By default, LocalOutlierFactor is only meant to be used for outlier detection (novelty=False). Set novelty to True if you want to use LocalOutlierFactor for novelty detection. In this case be aware that that you should only use predict, decision_function and score_samples on new unseen data and not on the training set. .. versionadded:: 0.20" - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits the model to the training set X and returns the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers." - }, - { - "name": "_fit_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fits the model to the training set X and returns the labels.\n\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the local outlier factor detector from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : LocalOutlierFactor\n The fitted local outlier factor detector." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers." - }, - { - "name": "_predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. to the training samples. If None, makes prediction on the training data without considering them as their own neighbors." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples. If None, makes prediction on the\n training data without considering them as their own neighbors.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers." - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal." - }, - { - "name": "_score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The query sample or samples to compute the Local Outlier Factor w.r.t. the training samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal." - }, - { - "name": "_local_reachability_density", - "decorators": [], - "parameters": [ - { - "name": "distances_X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Distances to the neighbors (in the training samples `self._fit_X`) of each query point to compute the LRD." - }, - { - "name": "neighbors_indices", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Neighbors indices (of each query point) among training samples self._fit_X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The local reachability density (LRD)\n\nThe LRD of a sample is the inverse of the average reachability\ndistance of its k-nearest neighbors.\n\nParameters\n----------\ndistances_X : ndarray of shape (n_queries, self.n_neighbors)\n Distances to the neighbors (in the training samples `self._fit_X`)\n of each query point to compute the LRD.\n\nneighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n Neighbors indices (of each query point) among training samples\n self._fit_X.\n\nReturns\n-------\nlocal_reachability_density : ndarray of shape (n_queries,)\n The local reachability density of each sample." - } - ], - "docstring": "Unsupervised Outlier Detection using Local Outlier Factor (LOF)\n\nThe anomaly score of each sample is called Local Outlier Factor.\nIt measures the local deviation of density of a given sample with\nrespect to its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of\nits neighbors, one can identify samples that have a substantially lower\ndensity than their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_neighbors : int, default=20\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n If n_neighbors is larger than the number of samples provided,\n all samples will be used.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric used for the distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a sparse matrix, in which case only \"nonzero\"\n elements may be considered neighbors.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics:\n https://docs.scipy.org/doc/scipy/reference/spatial.distance.html\n\np : int, default=2\n Parameter for the Minkowski metric from\n :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n is equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncontamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. When fitting this is used to define the\n threshold on the scores of the samples.\n\n - if 'auto', the threshold is determined as in the\n original paper,\n - if a float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\nnovelty : bool, default=False\n By default, LocalOutlierFactor is only meant to be used for outlier\n detection (novelty=False). Set novelty to True if you want to use\n LocalOutlierFactor for novelty detection. In this case be aware that\n that you should only use predict, decision_function and score_samples\n on new unseen data and not on the training set.\n\n .. versionadded:: 0.20\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nnegative_outlier_factor_ : ndarray of shape (n_samples,)\n The opposite LOF of the training samples. The higher, the more normal.\n Inliers tend to have a LOF score close to 1\n (``negative_outlier_factor_`` close to -1), while outliers tend to have\n a larger LOF score.\n\n The local outlier factor (LOF) of a sample captures its\n supposed 'degree of abnormality'.\n It is the average of the ratio of the local reachability density of\n a sample and those of its k-nearest neighbors.\n\nn_neighbors_ : int\n The actual number of neighbors used for :meth:`kneighbors` queries.\n\noffset_ : float\n Offset used to obtain binary labels from the raw scores.\n Observations having a negative_outlier_factor smaller than `offset_`\n are detected as abnormal.\n The offset is set to -1.5 (inliers score around -1), except when a\n contamination parameter different than \"auto\" is provided. In that\n case, the offset is defined in such a way we obtain the expected\n number of outliers in training.\n\n .. versionadded:: 0.20\n\neffective_metric_ : str\n The effective metric used for the distance computation.\n\neffective_metric_params_ : dict\n The effective additional keyword arguments for the metric function.\n\nn_samples_fit_ : int\n It is the number of samples in the fitted data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import LocalOutlierFactor\n>>> X = [[-1.1], [0.2], [101.1], [0.3]]\n>>> clf = LocalOutlierFactor(n_neighbors=2)\n>>> clf.fit_predict(X)\narray([ 1, 1, -1, 1])\n>>> clf.negative_outlier_factor_\narray([ -0.9821..., -1.0370..., -73.3697..., -0.9821...])\n\nReferences\n----------\n.. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n LOF: identifying density-based local outliers. In ACM sigmod record." - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._nca", - "imports": [ - "from __future__ import print_function", - "from warnings import warn", - "import numpy as np", - "import sys", - "import time", - "import numbers", - "from scipy.optimize import minimize", - "from utils.extmath import softmax", - "from metrics import pairwise_distances", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from preprocessing import LabelEncoder", - "from decomposition import PCA", - "from utils.multiclass import check_classification_targets", - "from utils.random import check_random_state", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import check_scalar", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning", - "from discriminant_analysis import LinearDiscriminantAnalysis" - ], - "classes": [ - { - "name": "NeighborhoodComponentsAnalysis", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preferred dimensionality of the projected space. If None it will be set to ``n_features``." - }, - { - "name": "init", - "type": "Literal['auto', 'pca', 'lda', 'identity', 'random']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Initialization of the linear transformation. Possible options are 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape (n_features_a, n_features_b). 'auto' Depending on ``n_components``, the most reasonable initialization will be chosen. If ``n_components <= n_classes`` we use 'lda', as it uses labels information. If not, but ``n_components < min(n_features, n_samples)``, we use 'pca', as it projects data in meaningful directions (those of higher variance). Otherwise, we just use 'identity'. 'pca' ``n_components`` principal components of the inputs passed to :meth:`fit` will be used to initialize the transformation. (See :class:`~sklearn.decomposition.PCA`) 'lda' ``min(n_components, n_classes)`` most discriminative components of the inputs passed to :meth:`fit` will be used to initialize the transformation. (If ``n_components > n_classes``, the rest of the components will be zero.) (See :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) 'identity' If ``n_components`` is strictly smaller than the dimensionality of the inputs passed to :meth:`fit`, the identity matrix will be truncated to the first ``n_components`` rows. 'random' The initial transformation will be a random array of shape `(n_components, n_features)`. Each value is sampled from the standard normal distribution. numpy array n_features_b must match the dimensionality of the inputs passed to :meth:`fit` and n_features_a must be less than or equal to that. If ``n_components`` is not None, n_features_a must match it." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True and :meth:`fit` has been called before, the solution of the previous call to :meth:`fit` is used as the initial linear transformation (``n_components`` and ``init`` will be ignored)." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "50", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations in the optimization." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-5", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance for the optimization." - }, - { - "name": "callback", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, this function is called after every iteration of the optimizer, taking as arguments the current solution (flattened transformation matrix) and the number of iterations. This might be useful in case one wants to examine or store the transformation found after each iteration." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "If 0, no progress messages will be printed. If 1, progress messages will be printed to stdout. If > 1, progress messages will be printed and the ``disp`` parameter of :func:`scipy.optimize.minimize` will be set to ``verbose - 2``." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random transformation. If ``init='pca'``, ``random_state`` is passed as an argument to PCA when initializing the transformation. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The corresponding training labels." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The corresponding training labels.\n\nReturns\n-------\nself : object\n returns a trained NeighborhoodComponentsAnalysis model." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data samples." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Applies the learned transformation to the given data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data samples.\n\nReturns\n-------\nX_embedded: ndarray of shape (n_samples, n_components)\n The data samples transformed.\n\nRaises\n------\nNotFittedError\n If :meth:`fit` has not been called before." - }, - { - "name": "_validate_params", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The corresponding training labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate parameters as soon as :meth:`fit` is called.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The corresponding training labels.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The validated training samples.\n\ny : ndarray of shape (n_samples,)\n The validated training labels, encoded to be integers in\n the range(0, n_classes).\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\nRaises\n-------\nTypeError\n If a parameter is not an instance of the desired type.\n\nValueError\n If a parameter's value violates its legal value range or if the\n combination of two or more given parameters is incompatible." - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training labels." - }, - { - "name": "init", - "type": "Union[NDArray, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The validated initialization of the linear transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the transformation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The training labels.\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\nReturns\n-------\ntransformation : ndarray of shape (n_components, n_features)\n The initialized linear transformation." - }, - { - "name": "_callback", - "decorators": [], - "parameters": [ - { - "name": "transformation", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The solution computed by the optimizer in this iteration." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Called after each iteration of the optimizer.\n\nParameters\n----------\ntransformation : ndarray of shape (n_components * n_features,)\n The solution computed by the optimizer in this iteration." - }, - { - "name": "_loss_grad_lbfgs", - "decorators": [], - "parameters": [ - { - "name": "transformation", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The raveled linear transformation on which to compute loss and evaluate gradient." - }, - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training samples." - }, - { - "name": "same_class_mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong to the same class, and ``0`` otherwise." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the loss and the loss gradient w.r.t. ``transformation``.\n\nParameters\n----------\ntransformation : ndarray of shape (n_components * n_features,)\n The raveled linear transformation on which to compute loss and\n evaluate gradient.\n\nX : ndarray of shape (n_samples, n_features)\n The training samples.\n\nsame_class_mask : ndarray of shape (n_samples, n_samples)\n A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong\n to the same class, and ``0`` otherwise.\n\nReturns\n-------\nloss : float\n The loss computed for the given transformation.\n\ngradient : ndarray of shape (n_components * n_features,)\n The new (flattened) gradient of the loss." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Neighborhood Components Analysis\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Preferred dimensionality of the projected space.\n If None it will be set to ``n_features``.\n\ninit : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape (n_features_a, n_features_b), default='auto'\n Initialization of the linear transformation. Possible options are\n 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape\n (n_features_a, n_features_b).\n\n 'auto'\n Depending on ``n_components``, the most reasonable initialization\n will be chosen. If ``n_components <= n_classes`` we use 'lda', as\n it uses labels information. If not, but\n ``n_components < min(n_features, n_samples)``, we use 'pca', as\n it projects data in meaningful directions (those of higher\n variance). Otherwise, we just use 'identity'.\n\n 'pca'\n ``n_components`` principal components of the inputs passed\n to :meth:`fit` will be used to initialize the transformation.\n (See :class:`~sklearn.decomposition.PCA`)\n\n 'lda'\n ``min(n_components, n_classes)`` most discriminative\n components of the inputs passed to :meth:`fit` will be used to\n initialize the transformation. (If ``n_components > n_classes``,\n the rest of the components will be zero.) (See\n :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n 'identity'\n If ``n_components`` is strictly smaller than the\n dimensionality of the inputs passed to :meth:`fit`, the identity\n matrix will be truncated to the first ``n_components`` rows.\n\n 'random'\n The initial transformation will be a random array of shape\n `(n_components, n_features)`. Each value is sampled from the\n standard normal distribution.\n\n numpy array\n n_features_b must match the dimensionality of the inputs passed to\n :meth:`fit` and n_features_a must be less than or equal to that.\n If ``n_components`` is not None, n_features_a must match it.\n\nwarm_start : bool, default=False\n If True and :meth:`fit` has been called before, the solution of the\n previous call to :meth:`fit` is used as the initial linear\n transformation (``n_components`` and ``init`` will be ignored).\n\nmax_iter : int, default=50\n Maximum number of iterations in the optimization.\n\ntol : float, default=1e-5\n Convergence tolerance for the optimization.\n\ncallback : callable, default=None\n If not None, this function is called after every iteration of the\n optimizer, taking as arguments the current solution (flattened\n transformation matrix) and the number of iterations. This might be\n useful in case one wants to examine or store the transformation\n found after each iteration.\n\nverbose : int, default=0\n If 0, no progress messages will be printed.\n If 1, progress messages will be printed to stdout.\n If > 1, progress messages will be printed and the ``disp``\n parameter of :func:`scipy.optimize.minimize` will be set to\n ``verbose - 2``.\n\nrandom_state : int or numpy.RandomState, default=None\n A pseudo random number generator object or a seed for it if int. If\n ``init='random'``, ``random_state`` is used to initialize the random\n transformation. If ``init='pca'``, ``random_state`` is passed as an\n argument to PCA when initializing the transformation. Pass an int\n for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The linear transformation learned during fitting.\n\nn_iter_ : int\n Counts the number of iterations performed by the optimizer.\n\nrandom_state_ : numpy.RandomState\n Pseudo random number generator object used during initialization.\n\nExamples\n--------\n>>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... stratify=y, test_size=0.7, random_state=42)\n>>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n>>> nca.fit(X_train, y_train)\nNeighborhoodComponentsAnalysis(...)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> knn.fit(X_train, y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(X_test, y_test))\n0.933333...\n>>> knn.fit(nca.transform(X_train), y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(nca.transform(X_test), y_test))\n0.961904...\n\nReferences\n----------\n.. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n \"Neighbourhood Components Analysis\". Advances in Neural Information\n Processing Systems. 17, 513-520, 2005.\n http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n.. [2] Wikipedia entry on Neighborhood Components Analysis\n https://en.wikipedia.org/wiki/Neighbourhood_components_analysis" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._nearest_centroid", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import sparse as sp", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from metrics.pairwise import pairwise_distances", - "from preprocessing import LabelEncoder", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.sparsefuncs import csc_median_axis_0", - "from utils.multiclass import check_classification_targets" - ], - "classes": [ - { - "name": "NearestCentroid", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by metrics.pairwise.pairwise_distances for its metric parameter. The centroids for the samples corresponding to each class is the point from which the sum of the distances (according to the metric) of all samples that belong to that particular class are minimized. If the \"manhattan\" metric is provided, this centroid is the median and for all other metrics, the centroid is now set to be the mean. .. versionchanged:: 0.19 ``metric='precomputed'`` was deprecated and now raises an error" - }, - { - "name": "shrink_threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for shrinking centroids to remove features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features. Note that centroid shrinking cannot be used with sparse matrices." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (integers)" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the NearestCentroid model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n Note that centroid shrinking cannot be used with sparse matrices.\ny : array-like of shape (n_samples,)\n Target values (integers)" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n\nNotes\n-----\nIf the metric constructor parameter is \"precomputed\", X is assumed to\nbe the distance matrix between the data to be predicted and\n``self.centroids_``." - } - ], - "docstring": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmetric : str or callable\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by metrics.pairwise.pairwise_distances for its\n metric parameter.\n The centroids for the samples corresponding to each class is the point\n from which the sum of the distances (according to the metric) of all\n samples that belong to that particular class are minimized.\n If the \"manhattan\" metric is provided, this centroid is the median and\n for all other metrics, the centroid is now set to be the mean.\n\n .. versionchanged:: 0.19\n ``metric='precomputed'`` was deprecated and now raises an error\n\nshrink_threshold : float, default=None\n Threshold for shrinking centroids to remove features.\n\nAttributes\n----------\ncentroids_ : array-like of shape (n_classes, n_features)\n Centroid of each class.\n\nclasses_ : array of shape (n_classes,)\n The unique classes labels.\n\nExamples\n--------\n>>> from sklearn.neighbors import NearestCentroid\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = NearestCentroid()\n>>> clf.fit(X, y)\nNearestCentroid()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nKNeighborsClassifier : Nearest neighbors classifier.\n\nNotes\n-----\nWhen used for text classification with tf-idf vectors, this classifier is\nalso known as the Rocchio classifier.\n\nReferences\n----------\nTibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\nmultiple cancer types by shrunken centroids of gene expression. Proceedings\nof the National Academy of Sciences of the United States of America,\n99(10), 6567-6572. The National Academy of Sciences." - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._regression", - "imports": [ - "import warnings", - "import numpy as np", - "from _base import _get_weights", - "from _base import _check_weights", - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from base import RegressorMixin", - "from utils import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "KNeighborsRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. Doesn't affect :meth:`fit` method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the k-nearest neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : KNeighborsRegressor\n The fitted k-nearest neighbors regressor." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the target for the provided data\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n Target values." - } - ], - "docstring": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsRegressor\n>>> neigh = KNeighborsRegressor(n_neighbors=2)\n>>> neigh.fit(X, y)\nKNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]\n\nSee Also\n--------\nNearestNeighbors\nRadiusNeighborsRegressor\nKNeighborsClassifier\nRadiusNeighborsClassifier\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances but\n different labels, the results will depend on the ordering of the\n training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - }, - { - "name": "RadiusNeighborsRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Range of parameter space to use by default for :meth:`radius_neighbors` queries." - }, - { - "name": "weights", - "type": "Literal['uniform', 'distance']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "weight function used in prediction. Possible values: - 'uniform' : uniform weights. All points in each neighborhood are weighted equally. - 'distance' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the radius neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : RadiusNeighborsRegressor\n The fitted radius neighbors regressor." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Test samples." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the target for the provided data\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=double\n Target values." - } - ], - "docstring": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsRegressor\n>>> neigh = RadiusNeighborsRegressor(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]\n\nSee Also\n--------\nNearestNeighbors\nKNeighborsRegressor\nKNeighborsClassifier\nRadiusNeighborsClassifier\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors._unsupervised", - "imports": [ - "from _base import NeighborsBase", - "from _base import KNeighborsMixin", - "from _base import RadiusNeighborsMixin", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "NearestNeighbors", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "Number of neighbors to use by default for :meth:`kneighbors` queries." - }, - { - "name": "radius", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Range of parameter space to use by default for :meth:`radius_neighbors` queries." - }, - { - "name": "algorithm", - "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force." - }, - { - "name": "leaf_size", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem." - }, - { - "name": "metric", - "type": "Union[Callable, str]", - "hasDefault": true, - "default": "'minkowski'", - "limitation": null, - "ignored": false, - "docstring": "the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of :class:`DistanceMetric` for a list of available metrics. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only \"nonzero\" elements may be considered neighbors." - }, - { - "name": "p", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Parameter for the Minkowski metric from sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." - }, - { - "name": "metric_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Additional keyword arguments for the metric function." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "Not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the nearest neighbors estimator from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : NearestNeighbors\n The fitted nearest neighbors estimator." - } - ], - "docstring": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\neffective_metric_ : str\n Metric used to compute distances to neighbors.\n\neffective_metric_params_ : dict\n Parameters for the metric used to compute distances to neighbors.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import NearestNeighbors\n>>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n>>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n>>> neigh.fit(samples)\nNearestNeighbors(...)\n\n>>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\narray([[2, 0]]...)\n\n>>> nbrs = neigh.radius_neighbors(\n... [[0, 0, 1.3]], 0.4, return_distance=False\n... )\n>>> np.asarray(nbrs[0][0])\narray(2)\n\nSee Also\n--------\nKNeighborsClassifier\nRadiusNeighborsClassifier\nKNeighborsRegressor\nRadiusNeighborsRegressor\nBallTree\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm" - } - ], - "functions": [] - }, - { - "name": "sklearn.neighbors", - "imports": [ - "from _ball_tree import BallTree", - "from _kd_tree import KDTree", - "from _dist_metrics import DistanceMetric", - "from _graph import kneighbors_graph", - "from _graph import radius_neighbors_graph", - "from _graph import KNeighborsTransformer", - "from _graph import RadiusNeighborsTransformer", - "from _unsupervised import NearestNeighbors", - "from _classification import KNeighborsClassifier", - "from _classification import RadiusNeighborsClassifier", - "from _regression import KNeighborsRegressor", - "from _regression import RadiusNeighborsRegressor", - "from _nearest_centroid import NearestCentroid", - "from _kde import KernelDensity", - "from _lof import LocalOutlierFactor", - "from _nca import NeighborhoodComponentsAnalysis", - "from _base import VALID_METRICS", - "from _base import VALID_METRICS_SPARSE" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neighbors.tests.test_ball_tree", - "imports": [ - "import itertools", - "import numpy as np", - "import pytest", - "from numpy.testing import assert_array_almost_equal", - "from sklearn.neighbors._ball_tree import BallTree", - "from sklearn.neighbors import DistanceMetric", - "from sklearn.utils import check_random_state", - "from sklearn.utils.validation import check_array", - "from sklearn.utils._testing import _convert_container" - ], - "classes": [], - "functions": [ - { - "name": "brute_force_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ball_tree_query_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_query_haversine", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_array_object_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we do not accept object dtype array." - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_dist_metrics", - "imports": [ - "import itertools", - "import pickle", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "import pytest", - "from scipy.spatial.distance import cdist", - "from sklearn.neighbors import DistanceMetric", - "from sklearn.neighbors import BallTree", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version" - ], - "classes": [], - "functions": [ - { - "name": "dist_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cdist_bool_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_cdist_bool", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pdist_bool_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pdist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pdist_bool", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_bool_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_haversine_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pyfunc_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_pyfunc_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_data_size", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_graph", - "imports": [ - "import numpy as np", - "from sklearn.metrics import euclidean_distances", - "from sklearn.neighbors import KNeighborsTransformer", - "from sklearn.neighbors import RadiusNeighborsTransformer", - "from sklearn.neighbors._base import _is_sorted_by_data" - ], - "classes": [], - "functions": [ - { - "name": "test_transformer_result", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_has_explicit_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return True if the diagonal is explicitly stored" - }, - { - "name": "test_explicit_diagonal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_kde", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_raises", - "from sklearn.neighbors import KernelDensity", - "from sklearn.neighbors import KDTree", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.neighbors._ball_tree import kernel_norm", - "from sklearn.pipeline import make_pipeline", - "from sklearn.datasets import make_blobs", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.exceptions import NotFittedError", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "compute_kernel_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_density_sampling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_algorithm_metric_choice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_badargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_pipeline_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kde_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_kd_tree", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.neighbors._kd_tree import KDTree" - ], - "classes": [], - "functions": [ - { - "name": "test_array_object_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we do not accept object dtype array." - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_lof", - "imports": [ - "from math import sqrt", - "import numpy as np", - "from sklearn import neighbors", - "import pytest", - "from numpy.testing import assert_array_equal", - "from sklearn import metrics", - "from sklearn.metrics import roc_auc_score", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils.estimator_checks import check_estimator", - "from sklearn.utils.estimator_checks import check_outlier_corruption", - "from sklearn.datasets import load_iris" - ], - "classes": [], - "functions": [ - { - "name": "test_lof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_performance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests LOF with a distance matrix." - }, - { - "name": "test_n_neighbors_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_contamination", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_novelty_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_novelty_training_scores", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasattr_prediction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_novelty_true_common_tests", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predicted_outlier_number", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_nca", - "imports": [ - "import pytest", - "import re", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from scipy.optimize import check_grad", - "from sklearn import clone", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.neighbors import NeighborhoodComponentsAnalysis", - "from sklearn.metrics import pairwise_distances" - ], - "classes": [], - "functions": [ - { - "name": "test_simple_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test on a simple example.\n\nPuts four points in the input space where the opposite labels points are\nnext to each other. After transform the samples from the same class\nshould be next to each other." - }, - { - "name": "test_toy_example_collapse_points", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test on a toy example of three points that should collapse\n\nWe build a simple example: two points from the same class and a point from\na different class in the middle of them. On this simple example, the new\n(transformed) points should all collapse into one single point. Indeed, the\nobjective is 2/(1 + exp(d/2)), with d the euclidean distance between the\ntwo samples from the same class. This is maximized for d=0 (because d>=0),\nwith an objective equal to 1 (loss=-1.)." - }, - { - "name": "test_finite_differences", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test gradient of loss function\n\nAssert that the gradient is almost equal to its finite differences\napproximation." - }, - { - "name": "test_params_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transformation_dimensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_components", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_init_transformation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_effectiveness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_singleton_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callback", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_expected_transformation_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the transformation has the expected shape." - }, - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parameters_valid_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_nearest_centroid", - "imports": [ - "import numpy as np", - "from scipy import sparse as sp", - "from numpy.testing import assert_array_equal", - "from sklearn.neighbors import NearestCentroid", - "from sklearn import datasets", - "from sklearn.utils._testing import assert_raises", - "import pickle" - ], - "classes": [], - "functions": [ - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shrinkage_correct", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shrinkage_threshold_decoded_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_translated_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_manhattan_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_features_zero_var", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_neighbors", - "imports": [ - "from itertools import product", - "import pytest", - "import numpy as np", - "from scipy.sparse import bsr_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from scipy.sparse import issparse", - "from sklearn import metrics", - "from sklearn import neighbors", - "from sklearn import datasets", - "from sklearn.base import clone", - "from sklearn.exceptions import DataConversionWarning", - "from sklearn.exceptions import EfficiencyWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.metrics.pairwise import pairwise_distances", - "from sklearn.model_selection import cross_val_score", - "from sklearn.model_selection import train_test_split", - "from sklearn.neighbors import VALID_METRICS_SPARSE", - "from sklearn.neighbors import VALID_METRICS", - "from sklearn.neighbors._base import _is_sorted_by_data", - "from sklearn.neighbors._base import _check_precomputed", - "from sklearn.pipeline import make_pipeline", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.validation import check_random_state", - "from sklearn.utils.fixes import sp_version", - "from sklearn.utils.fixes import parse_version", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "_weight_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Weight function to replace lambda d: d ** -2.\nThe lambda function is not valid because:\nif d==0 then 0^-2 is not valid. " - }, - { - "name": "test_unsupervised_kneighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupervised_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_neighbors_datatype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_fitted_error_gets_raised", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests unsupervised NearestNeighbors with a distance matrix." - }, - { - "name": "test_precomputed_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_sparse_knn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_sparse_radius", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_sorted_by_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_sparse_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed_cross_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupervised_radius_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier_float_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier_when_no_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier_outlier_labeling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_classifier_zero_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_regressors_zero_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_boundary_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether points lying on boundary are handled consistently\n\nAlso ensures that even with only one query point, an object array\nis returned rather than a 2d array." - }, - { - "name": "test_radius_neighbors_returns_array_of_objects", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_sort_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_RadiusNeighborsClassifier_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_classifier_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_KNeighborsClassifier_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_KNeighborsRegressor_multioutput_uniform_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_RadiusNeighborsRegressor_multioutput_with_uniform_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_RadiusNeighborsRegressor_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_graph_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_graph_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_badargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_metrics", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_callable_metric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_valid_brute_metric_for_auto_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_metric_params_interface", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_sparse_ball_kd_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_non_euclidean_kneighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_object_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_and_radius_neighbors_train_is_not_query", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_and_radius_neighbors_X_None", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_and_radius_neighbors_duplicates", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_include_self_neighbors_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_knn_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_radius_neighbors_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_knn_forcing_backend", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_convert", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_metric_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_boolean_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_radius_neighbors_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_with_nearest_neighbors_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_neighbors_pipeline", - "imports": [ - "import numpy as np", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.cluster.tests.common import generate_clustered_data", - "from sklearn.datasets import make_blobs", - "from sklearn.pipeline import make_pipeline", - "from sklearn.base import clone", - "from sklearn.neighbors import KNeighborsTransformer", - "from sklearn.neighbors import RadiusNeighborsTransformer", - "from sklearn.cluster import DBSCAN", - "from sklearn.cluster import SpectralClustering", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.neighbors import RadiusNeighborsRegressor", - "from sklearn.neighbors import LocalOutlierFactor", - "from sklearn.manifold import SpectralEmbedding", - "from sklearn.manifold import Isomap", - "from sklearn.manifold import TSNE" - ], - "classes": [], - "functions": [ - { - "name": "test_spectral_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_spectral_embedding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dbscan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isomap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tsne", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_novelty_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lof_novelty_true", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kneighbors_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_neighbors_tree", - "imports": [ - "import pickle", - "import itertools", - "import numpy as np", - "import pytest", - "from sklearn.neighbors import DistanceMetric", - "from sklearn.neighbors._ball_tree import BallTree", - "from sklearn.neighbors._ball_tree import kernel_norm", - "from sklearn.neighbors._ball_tree import DTYPE", - "from sklearn.neighbors._ball_tree import ITYPE", - "from sklearn.neighbors._ball_tree import NeighborsHeap as NeighborsHeapBT", - "from sklearn.neighbors._ball_tree import simultaneous_sort as simultaneous_sort_bt", - "from sklearn.neighbors._ball_tree import nodeheap_sort as nodeheap_sort_bt", - "from sklearn.neighbors._kd_tree import KDTree", - "from sklearn.neighbors._kd_tree import NeighborsHeap as NeighborsHeapKDT", - "from sklearn.neighbors._kd_tree import simultaneous_sort as simultaneous_sort_kdt", - "from sklearn.neighbors._kd_tree import nodeheap_sort as nodeheap_sort_kdt", - "from sklearn.utils import check_random_state", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_allclose", - "from scipy.stats import gaussian_kde" - ], - "classes": [], - "functions": [ - { - "name": "dist_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "compute_kernel_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "brute_force_neighbors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbor_tree_query_radius", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbor_tree_query_radius_distance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbor_tree_two_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_neighbors_heap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_node_heap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_simultaneous_sort", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_kde", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nn_tree_query", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests.test_quad_tree", - "imports": [ - "import pickle", - "import numpy as np", - "import pytest", - "from sklearn.neighbors._quad_tree import _QuadTree", - "from sklearn.utils import check_random_state" - ], - "classes": [], - "functions": [ - { - "name": "test_quadtree_boundary_computation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quadtree_similar_point", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quad_tree_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qt_insert_duplicate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_summarize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neighbors.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neural_network._base", - "imports": [ - "import numpy as np", - "from scipy.special import expit as logistic_sigmoid", - "from scipy.special import xlogy" - ], - "classes": [], - "functions": [ - { - "name": "inplace_identity", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data, where n_samples is the number of samples and n_features is the number of features." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Simply leave the input array unchanged.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Data, where n_samples is the number of samples\n and n_features is the number of features." - }, - { - "name": "inplace_logistic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the logistic function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_tanh", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the hyperbolic tan function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_relu", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the rectified linear unit function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_softmax", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the K-way softmax function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data." - }, - { - "name": "inplace_identity_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the identity activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the identity function: do nothing.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the identity activation function during\n the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "inplace_logistic_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the logistic activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the logistic sigmoid function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from logistic function.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the logistic activation function during\n the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "inplace_tanh_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the hyperbolic tangent activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the hyperbolic tanh function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from hyperbolic tangent.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the hyperbolic tangent activation\n function during the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "inplace_relu_derivative", - "decorators": [], - "parameters": [ - { - "name": "Z", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data which was output from the rectified linear units activation function during the forward pass." - }, - { - "name": "delta", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The backpropagated error signal to be modified inplace." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the derivative of the relu function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from rectified linear units activation function.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the rectified linear units activation\n function during the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace." - }, - { - "name": "squared_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) values." - }, - { - "name": "y_pred", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted values, as returned by a regression estimator." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the squared loss for regression.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) values.\n\ny_pred : array-like or label indicator matrix\n Predicted values, as returned by a regression estimator.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted." - }, - { - "name": "log_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_prob", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted probabilities, as returned by a classifier's predict_proba method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute Logistic loss for classification.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\ny_prob : array-like of float, shape = (n_samples, n_classes)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted." - }, - { - "name": "binary_log_loss", - "decorators": [], - "parameters": [ - { - "name": "y_true", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ground truth (correct) labels." - }, - { - "name": "y_prob", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted probabilities, as returned by a classifier's predict_proba method." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute binary logistic loss for classification.\n\nThis is identical to log_loss in binary classification case,\nbut is kept for its use in multilabel case.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\ny_prob : array-like of float, shape = (n_samples, 1)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted." - } - ] - }, - { - "name": "sklearn.neural_network._multilayer_perceptron", - "imports": [ - "import numpy as np", - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import scipy.optimize", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import is_classifier", - "from _base import ACTIVATIONS", - "from _base import DERIVATIVES", - "from _base import LOSS_FUNCTIONS", - "from _stochastic_optimizers import SGDOptimizer", - "from _stochastic_optimizers import AdamOptimizer", - "from model_selection import train_test_split", - "from preprocessing import LabelBinarizer", - "from utils import gen_batches", - "from utils import check_random_state", - "from utils import shuffle", - "from utils import _safe_indexing", - "from utils import column_or_1d", - "from exceptions import ConvergenceWarning", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import _check_partial_fit_first_call", - "from utils.multiclass import unique_labels", - "from utils.multiclass import type_of_target", - "from utils.optimize import _check_optimize_result" - ], - "classes": [ - { - "name": "BaseMultilayerPerceptron", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_unpack", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract the coefficients and intercepts from packed_parameters." - }, - { - "name": "_forward_pass", - "decorators": [], - "parameters": [ - { - "name": "activations", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the values of the ith layer." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform a forward pass on the network by computing the values\nof the neurons in the hidden layers and the output layer.\n\nParameters\n----------\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer." - }, - { - "name": "_forward_pass_fast", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the trained model\n\nThis is the same as _forward_pass but does not record the activations\nof all layers and only returns the last layer's activation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The decision function of the samples for each class in the model." - }, - { - "name": "_compute_loss_grad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the gradient of loss with respect to coefs and intercept for\nspecified layer.\n\nThis function does backpropagation for the specified one layer." - }, - { - "name": "_loss_grad_lbfgs", - "decorators": [], - "parameters": [ - { - "name": "packed_coef_inter", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A vector comprising the flattened coefficients and intercepts." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "activations", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the values of the ith layer." - }, - { - "name": "deltas", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the difference between the activations of the i + 1 layer and the backpropagated error. More specifically, deltas are gradients of loss with respect to z in each layer, where z = wx + b is the value of a particular layer before passing through the activation function" - }, - { - "name": "coef_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the coefficient parameters of the ith layer in an iteration." - }, - { - "name": "intercept_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the intercept parameters of the ith layer in an iteration." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the MLP loss function and its corresponding derivatives\nwith respect to the different parameters given in the initialization.\n\nReturned gradients are packed in a single vector so it can be used\nin lbfgs\n\nParameters\n----------\npacked_coef_inter : ndarray\n A vector comprising the flattened coefficients and intercepts.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\ndeltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\ncoef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\nintercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\nReturns\n-------\nloss : float\ngrad : array-like, shape (number of nodes of all layers,)" - }, - { - "name": "_backprop", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "activations", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the values of the ith layer." - }, - { - "name": "deltas", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element of the list holds the difference between the activations of the i + 1 layer and the backpropagated error. More specifically, deltas are gradients of loss with respect to z in each layer, where z = wx + b is the value of a particular layer before passing through the activation function" - }, - { - "name": "coef_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the coefficient parameters of the ith layer in an iteration." - }, - { - "name": "intercept_grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element contains the amount of change used to update the intercept parameters of the ith layer in an iteration." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the MLP loss function and its corresponding derivatives\nwith respect to each parameter: weights and bias vectors.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\ndeltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\ncoef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\nintercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\nReturns\n-------\nloss : float\ncoef_grads : list, length = n_layers - 1\nintercept_grads : list, length = n_layers - 1" - }, - { - "name": "_initialize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_init_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_hyperparameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_lbfgs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit_stochastic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_no_improvement_count", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels in classification, real numbers in regression)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to data matrix X and target(s) y.\n\nParameters\n----------\nX : ndarray or sparse matrix of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\nReturns\n-------\nself : returns a trained MLP model." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nReturns\n-------\nself : returns a trained MLP model." - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18" - }, - { - "name": "MLPClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "hidden_layer_sizes", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element represents the number of neurons in the ith hidden layer." - }, - { - "name": "activation", - "type": "Literal['identity', 'logistic', 'tanh', 'relu']", - "hasDefault": true, - "default": "'relu'", - "limitation": null, - "ignored": false, - "docstring": "Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, returns f(x) = x - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x)" - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'sgd', 'adam']", - "hasDefault": true, - "default": "'adam'", - "limitation": null, - "ignored": false, - "docstring": "The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'lbfgs' can converge faster and perform better." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 penalty (regularization term) parameter." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to \"auto\", `batch_size=min(200, n_samples)`" - }, - { - "name": "learning_rate", - "type": "Literal['constant', 'invscaling', 'adaptive']", - "hasDefault": true, - "default": "'constant'", - "limitation": null, - "ignored": false, - "docstring": "Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by 'learning_rate_init'. - 'invscaling' gradually decreases the learning rate at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) - 'adaptive' keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when ``solver='sgd'``." - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for weights and bias initialization, train-test split if early stopping is used, and batch sampling when solver='sgd' or 'adam'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to print progress messages to stdout." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "momentum", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'." - }, - { - "name": "nesterovs_momentum", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for ``n_iter_no_change`` consecutive epochs. The split is stratified, except in a multilabel setting. Only effective when solver='sgd' or 'adam'" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True" - }, - { - "name": "beta_1", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "beta_2", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Value for numerical stability in adam. Only used when solver='adam'" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' .. versionadded:: 0.20" - }, - { - "name": "max_fun", - "type": "int", - "hasDefault": true, - "default": "15000", - "limitation": null, - "ignored": false, - "docstring": "Only used when solver='lbfgs'. Maximum number of loss function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of loss function calls. Note that number of loss function calls will be greater than or equal to the number of iterations for the `MLPClassifier`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the multi-layer perceptron classifier\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny : ndarray, shape (n_samples,) or (n_samples, n_classes)\n The predicted classes." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values." - }, - { - "name": "classes", - "type": "Array", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : array-like of shape (n_samples,)\n The target values.\n\nclasses : array of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : returns a trained MLP model." - }, - { - "name": "_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the log of probability estimates.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nlog_y_prob : ndarray of shape (n_samples, n_classes)\n The predicted log-probability of the sample for each class\n in the model, where classes are ordered as they are in\n `self.classes_`. Equivalent to log(predict_proba(X))" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny_prob : ndarray of shape (n_samples, n_classes)\n The predicted probability of the sample for each class in the\n model, where classes are ordered as they are in `self.classes_`." - } - ], - "docstring": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed\n by Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\nalpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\nbatch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate at each\n time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when ``solver='sgd'``.\n\nlearning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\nshuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\nverbose : bool, default=False\n Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nmomentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least tol for\n ``n_iter_no_change`` consecutive epochs. The split is stratified,\n except in a multilabel setting.\n Only effective when solver='sgd' or 'adam'\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nepsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\nn_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of loss function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of loss function calls.\n Note that number of loss function calls will be greater than or equal\n to the number of iterations for the `MLPClassifier`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output.\n\nloss_ : float\n The current loss computed with the loss function.\n\nbest_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\nn_iter_ : int\n The number of iterations the solver has ran.\n\nn_layers_ : int\n Number of layers.\n\nn_outputs_ : int\n Number of outputs.\n\nout_activation_ : str\n Name of the output activation function.\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPClassifier\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n... random_state=1)\n>>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n>>> clf.predict_proba(X_test[:1])\narray([[0.038..., 0.961...]])\n>>> clf.predict(X_test[:5, :])\narray([1, 0, 1, 0, 1])\n>>> clf.score(X_test, y_test)\n0.8...\n\nNotes\n-----\nMLPClassifier trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\nHe, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\nKingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014)." - }, - { - "name": "MLPRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "hidden_layer_sizes", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The ith element represents the number of neurons in the ith hidden layer." - }, - { - "name": "activation", - "type": "Literal['identity', 'logistic', 'tanh', 'relu']", - "hasDefault": true, - "default": "'relu'", - "limitation": null, - "ignored": false, - "docstring": "Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, returns f(x) = x - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x)" - }, - { - "name": "solver", - "type": "Literal['lbfgs', 'sgd', 'adam']", - "hasDefault": true, - "default": "'adam'", - "limitation": null, - "ignored": false, - "docstring": "The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'lbfgs' can converge faster and perform better." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "L2 penalty (regularization term) parameter." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to \"auto\", `batch_size=min(200, n_samples)`" - }, - { - "name": "learning_rate", - "type": "Literal['constant', 'invscaling', 'adaptive']", - "hasDefault": true, - "default": "'constant'", - "limitation": null, - "ignored": false, - "docstring": "Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by 'learning_rate_init'. - 'invscaling' gradually decreases the learning rate ``learning_rate_`` at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) - 'adaptive' keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when solver='sgd'." - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'." - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps." - }, - { - "name": "shuffle", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'." - }, - { - "name": "random_state", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for weights and bias initialization, train-test split if early stopping is used, and batch sampling when solver='sgd' or 'adam'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to print progress messages to stdout." - }, - { - "name": "warm_start", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `." - }, - { - "name": "momentum", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'." - }, - { - "name": "nesterovs_momentum", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0." - }, - { - "name": "early_stopping", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive epochs. Only effective when solver='sgd' or 'adam'" - }, - { - "name": "validation_fraction", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True" - }, - { - "name": "beta_1", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "beta_2", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam'" - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Value for numerical stability in adam. Only used when solver='adam'" - }, - { - "name": "n_iter_no_change", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' .. versionadded:: 0.20" - }, - { - "name": "max_fun", - "type": "int", - "hasDefault": true, - "default": "15000", - "limitation": null, - "ignored": false, - "docstring": "Only used when solver='lbfgs'. Maximum number of function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of function calls. Note that number of function calls will be greater than or equal to the number of iterations for the MLPRegressor. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict using the multi-layer perceptron model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_outputs)\n The predicted values." - }, - { - "name": "_validate_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared-loss using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed by\n Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\nalpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\nbatch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n at each time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when solver='sgd'.\n\nlearning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\nshuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\nverbose : bool, default=False\n Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nmomentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least ``tol`` for\n ``n_iter_no_change`` consecutive epochs.\n Only effective when solver='sgd' or 'adam'\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nepsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\nn_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of function calls.\n Note that number of function calls will be greater than or equal to\n the number of iterations for the MLPRegressor.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nloss_ : float\n The current loss computed with the loss function.\n\nbest_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\nn_iter_ : int\n The number of iterations the solver has ran.\n\nn_layers_ : int\n Number of layers.\n\nn_outputs_ : int\n Number of outputs.\n\nout_activation_ : str\n Name of the output activation function.\n\nloss_curve_ : list of shape (n_iters,)\n Loss value evaluated at the end of each training step.\n\nt_ : int\n Mathematically equals `n_iters * X.shape[0]`, it means\n `time_step` and it is used by optimizer's learning rate scheduler.\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPRegressor\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(n_samples=200, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=1)\n>>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n>>> regr.predict(X_test[:2])\narray([-0.9..., -7.1...])\n>>> regr.score(X_test, y_test)\n0.4...\n\nNotes\n-----\nMLPRegressor trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense and sparse numpy\narrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\nHe, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\nKingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014)." - } - ], - "functions": [ - { - "name": "_pack", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pack the parameters into a single vector." - } - ] - }, - { - "name": "sklearn.neural_network._rbm", - "imports": [ - "import time", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy.special import expit", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import check_random_state", - "from utils import gen_even_slices", - "from utils.extmath import safe_sparse_dot", - "from utils.extmath import log_logistic", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "BernoulliRBM", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_components", - "type": "int", - "hasDefault": true, - "default": "256", - "limitation": null, - "ignored": false, - "docstring": "Number of binary hidden units." - }, - { - "name": "learning_rate", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The learning rate for weight updates. It is *highly* recommended to tune this hyper-parameter. Reasonable values are in the 10**[0., -3.] range." - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of examples per minibatch." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations/sweeps over the training dataset to perform during training." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The verbosity level. The default, zero, means silent mode." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for: - Gibbs sampling from visible and hidden layers. - Initializing components, sampling from layers during fit. - Corrupting the data when scoring samples. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the hidden layer activation probabilities, P(h=1|v=X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to be transformed.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Latent representations of the data." - }, - { - "name": "_mean_hiddens", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the probabilities P(h=1|v).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Corresponding mean field values for the hidden layer." - }, - { - "name": "_sample_hiddens", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer to sample from." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Random number generator to use." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sample from the distribution P(h|v).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer to sample from.\n\nrng : RandomState instance\n Random number generator to use.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Values of the hidden layer." - }, - { - "name": "_sample_visibles", - "decorators": [], - "parameters": [ - { - "name": "h", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the hidden layer to sample from." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Random number generator to use." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Sample from the distribution P(v|h).\n\nParameters\n----------\nh : ndarray of shape (n_samples, n_components)\n Values of the hidden layer to sample from.\n\nrng : RandomState instance\n Random number generator to use.\n\nReturns\n-------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer." - }, - { - "name": "_free_energy", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\nReturns\n-------\nfree_energy : ndarray of shape (n_samples,)\n The value of the free energy." - }, - { - "name": "gibbs", - "decorators": [], - "parameters": [ - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer to start from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform one Gibbs sampling step.\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer to start from.\n\nReturns\n-------\nv_new : ndarray of shape (n_samples, n_features)\n Values of the visible layer after one Gibbs step." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to the data X which should contain a partial\nsegment of the data.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nself : BernoulliRBM\n The fitted model." - }, - { - "name": "_fit", - "decorators": [], - "parameters": [ - { - "name": "v_pos", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to use for training." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Random number generator to use for sampling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inner fit for one mini-batch.\n\nAdjust the parameters to maximize the likelihood of v using\nStochastic Maximum Likelihood (SML).\n\nParameters\n----------\nv_pos : ndarray of shape (n_samples, n_features)\n The data to use for training.\n\nrng : RandomState instance\n Random number generator to use for sampling." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values of the visible layer. Must be all-boolean (not checked)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the pseudo-likelihood of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Values of the visible layer. Must be all-boolean (not checked).\n\nReturns\n-------\npseudo_likelihood : ndarray of shape (n_samples,)\n Value of the pseudo-likelihood (proxy for likelihood).\n\nNotes\n-----\nThis method is not deterministic: it computes a quantity called the\nfree energy on X, then on a randomly corrupted version of X, and\nreturns the log of the logistic function of the difference." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training data." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model to the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nself : BernoulliRBM\n The fitted model." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=256\n Number of binary hidden units.\n\nlearning_rate : float, default=0.1\n The learning rate for weight updates. It is *highly* recommended\n to tune this hyper-parameter. Reasonable values are in the\n 10**[0., -3.] range.\n\nbatch_size : int, default=10\n Number of examples per minibatch.\n\nn_iter : int, default=10\n Number of iterations/sweeps over the training dataset to perform\n during training.\n\nverbose : int, default=0\n The verbosity level. The default, zero, means silent mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for:\n\n - Gibbs sampling from visible and hidden layers.\n\n - Initializing components, sampling from layers during fit.\n\n - Corrupting the data when scoring samples.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nintercept_hidden_ : array-like of shape (n_components,)\n Biases of the hidden units.\n\nintercept_visible_ : array-like of shape (n_features,)\n Biases of the visible units.\n\ncomponents_ : array-like of shape (n_components, n_features)\n Weight matrix, where n_features in the number of\n visible units and n_components is the number of hidden units.\n\nh_samples_ : array-like of shape (batch_size, n_components)\n Hidden Activation sampled from the model distribution,\n where batch_size in the number of examples per minibatch and\n n_components is the number of hidden units.\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.neural_network import BernoulliRBM\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> model = BernoulliRBM(n_components=2)\n>>> model.fit(X)\nBernoulliRBM(n_components=2)\n\nReferences\n----------\n\n[1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n deep belief nets. Neural Computation 18, pp 1527-1554.\n https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n[2] Tieleman, T. Training Restricted Boltzmann Machines using\n Approximations to the Likelihood Gradient. International Conference\n on Machine Learning (ICML) 2008" - } - ], - "functions": [] - }, - { - "name": "sklearn.neural_network._stochastic_optimizers", - "imports": [ - "import numpy as np" - ], - "classes": [ - { - "name": "BaseOptimizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params" - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "update_params", - "decorators": [], - "parameters": [ - { - "name": "grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Containing gradients with respect to coefs_ and intercepts_ in MLP model. So length should be aligned with params" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Update parameters with given gradients\n\nParameters\n----------\ngrads : list, length = len(params)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params" - }, - { - "name": "iteration_ends", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform update to learning rate and potentially other states at the\nend of an iteration" - }, - { - "name": "trigger_stopping", - "decorators": [], - "parameters": [ - { - "name": "msg", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Message passed in for verbose output" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Print message to stdin if True" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decides whether it is time to stop training\n\nParameters\n----------\nmsg : str\n Message passed in for verbose output\n\nverbose : bool\n Print message to stdin if True\n\nReturns\n-------\nis_stopping : bool\n True if training needs to stop" - } - ], - "docstring": "Base (Stochastic) gradient descent optimizer\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nAttributes\n----------\nlearning_rate : float\n the current learning rate" - }, - { - "name": "SGDOptimizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params" - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights" - }, - { - "name": "lr_schedule", - "type": "Literal['constant', 'adaptive', 'invscaling']", - "hasDefault": true, - "default": "'constant'", - "limitation": null, - "ignored": false, - "docstring": "Learning rate schedule for weight updates. -'constant', is a constant learning rate given by 'learning_rate_init'. -'invscaling' gradually decreases the learning rate 'learning_rate_' at each time step 't' using an inverse scaling exponent of 'power_t'. learning_rate_ = learning_rate_init / pow(t, power_t) -'adaptive', keeps the learning rate constant to 'learning_rate_init' as long as the training keeps decreasing. Each time 2 consecutive epochs fail to decrease the training loss by tol, or fail to increase validation score by tol if 'early_stopping' is on, the current learning rate is divided by 5." - }, - { - "name": "momentum", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Value of momentum used, must be larger than or equal to 0" - }, - { - "name": "nesterov", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use nesterov's momentum or not. Use nesterov's if True" - }, - { - "name": "power_t", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Power of time step 't' in inverse scaling. See `lr_schedule` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "iteration_ends", - "decorators": [], - "parameters": [ - { - "name": "time_step", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "number of training samples trained on so far, used to update learning rate for 'invscaling'" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform updates to learning rate and potential other states at the\nend of an iteration\n\nParameters\n----------\ntime_step : int\n number of training samples trained on so far, used to update\n learning rate for 'invscaling'" - }, - { - "name": "trigger_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_updates", - "decorators": [], - "parameters": [ - { - "name": "grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Containing gradients with respect to coefs_ and intercepts_ in MLP model. So length should be aligned with params" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the values used to update params with given gradients\n\nParameters\n----------\ngrads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\nReturns\n-------\nupdates : list, length = len(grads)\n The values to add to params" - } - ], - "docstring": "Stochastic gradient descent optimizer with momentum\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nlr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'\n Learning rate schedule for weight updates.\n\n -'constant', is a constant learning rate given by\n 'learning_rate_init'.\n\n -'invscaling' gradually decreases the learning rate 'learning_rate_' at\n each time step 't' using an inverse scaling exponent of 'power_t'.\n learning_rate_ = learning_rate_init / pow(t, power_t)\n\n -'adaptive', keeps the learning rate constant to\n 'learning_rate_init' as long as the training keeps decreasing.\n Each time 2 consecutive epochs fail to decrease the training loss by\n tol, or fail to increase validation score by tol if 'early_stopping'\n is on, the current learning rate is divided by 5.\n\nmomentum : float, default=0.9\n Value of momentum used, must be larger than or equal to 0\n\nnesterov : bool, default=True\n Whether to use nesterov's momentum or not. Use nesterov's if True\n\npower_t : float, default=0.5\n Power of time step 't' in inverse scaling. See `lr_schedule` for\n more details.\n\nAttributes\n----------\nlearning_rate : float\n the current learning rate\n\nvelocities : list, length = len(params)\n velocities that are used to update params" - }, - { - "name": "AdamOptimizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "params", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params" - }, - { - "name": "learning_rate_init", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The initial learning rate used. It controls the step-size in updating the weights" - }, - { - "name": "beta_1", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of first moment vector, should be in [0, 1)" - }, - { - "name": "beta_2", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Exponential decay rate for estimates of second moment vector, should be in [0, 1)" - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "1e-8", - "limitation": null, - "ignored": false, - "docstring": "Value for numerical stability" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_updates", - "decorators": [], - "parameters": [ - { - "name": "grads", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Containing gradients with respect to coefs_ and intercepts_ in MLP model. So length should be aligned with params" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get the values used to update params with given gradients\n\nParameters\n----------\ngrads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\nReturns\n-------\nupdates : list, length = len(grads)\n The values to add to params" - } - ], - "docstring": "Stochastic gradient descent optimizer with Adam\n\nNote: All default values are from the original Adam paper\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.001\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector, should be\n in [0, 1)\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector, should be\n in [0, 1)\n\nepsilon : float, default=1e-8\n Value for numerical stability\n\nAttributes\n----------\nlearning_rate : float\n The current learning rate\n\nt : int\n Timestep\n\nms : list, length = len(params)\n First moment vectors\n\nvs : list, length = len(params)\n Second moment vectors\n\nReferences\n----------\nKingma, Diederik, and Jimmy Ba.\n\"Adam: A method for stochastic optimization.\"\narXiv preprint arXiv:1412.6980 (2014)." - } - ], - "functions": [] - }, - { - "name": "sklearn.neural_network", - "imports": [ - "from _rbm import BernoulliRBM", - "from _multilayer_perceptron import MLPClassifier", - "from _multilayer_perceptron import MLPRegressor" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.neural_network.tests.test_base", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.neural_network._base import binary_log_loss", - "from sklearn.neural_network._base import log_loss" - ], - "classes": [], - "functions": [ - { - "name": "test_binary_log_loss_1_prob_finite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_log_loss_1_prob_finite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests.test_mlp", - "imports": [ - "import pytest", - "import sys", - "import warnings", - "import re", - "import numpy as np", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_regression", - "from sklearn.datasets import make_multilabel_classification", - "from sklearn.exceptions import ConvergenceWarning", - "from io import StringIO", - "from sklearn.metrics import roc_auc_score", - "from sklearn.neural_network import MLPClassifier", - "from sklearn.neural_network import MLPRegressor", - "from sklearn.preprocessing import LabelBinarizer", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import scale", - "from scipy.sparse import csr_matrix", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gradient", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_classification_maxfun", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lbfgs_regression_maxfun", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_learning_rate_warmstart", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_classes_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_unseen_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_params_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_matrices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose_sgd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adaptive_learning_rate", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warm_start_full_iteration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_no_change", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_iter_no_change_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping_stratified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mlp_classifier_dtypes_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mlp_regressor_dtypes_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mlp_param_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests.test_rbm", - "imports": [ - "import sys", - "import re", - "import pytest", - "import numpy as np", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.datasets import load_digits", - "from io import StringIO", - "from sklearn.neural_network import BernoulliRBM", - "from sklearn.utils.validation import assert_all_finite" - ], - "classes": [], - "functions": [ - { - "name": "test_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_small_sparse_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_hiddens", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_gibbs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_gibbs_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gibbs_smoke", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rbm_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_and_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transformer_dtypes_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_dtype_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests.test_stochastic_optimizers", - "imports": [ - "import numpy as np", - "from sklearn.neural_network._stochastic_optimizers import BaseOptimizer", - "from sklearn.neural_network._stochastic_optimizers import SGDOptimizer", - "from sklearn.neural_network._stochastic_optimizers import AdamOptimizer", - "from sklearn.utils._testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_base_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_no_momentum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_momentum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_trigger_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sgd_optimizer_nesterovs_momentum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_adam_optimizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.neural_network.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.preprocessing.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing._data", - "imports": [ - "from itertools import chain", - "from itertools import combinations", - "import warnings", - "from itertools import combinations_with_replacement as combinations_w_r", - "import numpy as np", - "from scipy import sparse", - "from scipy import stats", - "from scipy import optimize", - "from scipy.special import boxcox", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils.deprecation import deprecated", - "from utils.extmath import row_norms", - "from utils.extmath import _incremental_mean_and_var", - "from utils.extmath import _incremental_weighted_mean_and_var", - "from utils.sparsefuncs_fast import inplace_csr_row_normalize_l1", - "from utils.sparsefuncs_fast import inplace_csr_row_normalize_l2", - "from utils.sparsefuncs import inplace_column_scale", - "from utils.sparsefuncs import mean_variance_axis", - "from utils.sparsefuncs import incr_mean_variance_axis", - "from utils.sparsefuncs import min_max_axis", - "from utils.validation import check_is_fitted", - "from utils.validation import check_random_state", - "from utils.validation import _check_sample_weight", - "from utils.validation import FLOAT_DTYPES", - "from utils.validation import _deprecate_positional_args", - "from _csr_polynomial_expansion import _csr_polynomial_expansion", - "from _encoders import OneHotEncoder" - ], - "classes": [ - { - "name": "MinMaxScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "feature_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired range of transformed data." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array)." - }, - { - "name": "clip: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set to True to clip transformed values of held-out data to provided `feature range`. .. versionadded:: 0.24" - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set to True to clip transformed values of held-out data to provided `feature range`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the per-feature minimum and maximum used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the minimum and maximum to be used for later scaling.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online computation of min and max on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data that will be transformed." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale features of X according to feature_range.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data that will be transformed.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n Transformed data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data that will be transformed. It cannot be sparse." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Undo the scaling of X according to feature_range.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data that will be transformed. It cannot be sparse.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n Transformed data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfeature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\ncopy : bool, default=True\n Set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array).\n\nclip: bool, default=False\n Set to True to clip transformed values of held-out data to\n provided `feature range`.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nmin_ : ndarray of shape (n_features,)\n Per feature adjustment for minimum. Equivalent to\n ``min - X.min(axis=0) * self.scale_``\n\nscale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data. Equivalent to\n ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\ndata_min_ : ndarray of shape (n_features,)\n Per feature minimum seen in the data\n\n .. versionadded:: 0.17\n *data_min_*\n\ndata_max_ : ndarray of shape (n_features,)\n Per feature maximum seen in the data\n\n .. versionadded:: 0.17\n *data_max_*\n\ndata_range_ : ndarray of shape (n_features,)\n Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n .. versionadded:: 0.17\n *data_range_*\n\nn_samples_seen_ : int\n The number of samples processed by the estimator.\n It will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MinMaxScaler\n>>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n>>> scaler = MinMaxScaler()\n>>> print(scaler.fit(data))\nMinMaxScaler()\n>>> print(scaler.data_max_)\n[ 1. 18.]\n>>> print(scaler.transform(data))\n[[0. 0. ]\n [0.25 0.25]\n [0.5 0.5 ]\n [1. 1. ]]\n>>> print(scaler.transform([[2, 2]]))\n[[1.5 0. ]]\n\nSee Also\n--------\nminmax_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "StandardScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned." - }, - { - "name": "with_mean", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory." - }, - { - "name": "with_std", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to unit variance (or equivalently, unit standard deviation)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. .. versionadded:: 0.24 parameter *sample_weight* support to StandardScaler." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the mean and std to be used for later scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Individual weights for each sample. .. versionadded:: 0.24 parameter *sample_weight* support to StandardScaler." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online computation of mean and std on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nThe algorithm for incremental mean and std is given in Equation 1.5a,b\nin Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\nfor computing the sample variance: Analysis and recommendations.\"\nThe American Statistician 37.3 (1983): 242-247:\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform standardization by centering and scaling\n\nParameters\n----------\nX : {array-like, sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis.\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis.\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Standardize features by removing the mean and scaling to unit variance\n\nThe standard score of a sample `x` is calculated as:\n\n z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthat others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncopy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\nwith_mean : bool, default=True\n If True, center the data before scaling.\n This does not work (and will raise an exception) when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\nwith_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,) or None\n Per feature relative scaling of the data to achieve zero mean and unit\n variance. Generally this is calculated using `np.sqrt(var_)`. If a\n variance is zero, we can't achieve unit variance, and the data is left\n as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n when `with_std=False`.\n\n .. versionadded:: 0.17\n *scale_*\n\nmean_ : ndarray of shape (n_features,) or None\n The mean value for each feature in the training set.\n Equal to ``None`` when ``with_mean=False``.\n\nvar_ : ndarray of shape (n_features,) or None\n The variance for each feature in the training set. Used to compute\n `scale_`. Equal to ``None`` when ``with_std=False``.\n\nn_samples_seen_ : int or ndarray of shape (n_features,)\n The number of samples processed by the estimator for each feature.\n If there are no missing samples, the ``n_samples_seen`` will be an\n integer, otherwise it will be an array of dtype int. If\n `sample_weights` are used it will be a float (if no missing data)\n or an array of dtype float that sums the weights seen so far.\n Will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler\n>>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n>>> scaler = StandardScaler()\n>>> print(scaler.fit(data))\nStandardScaler()\n>>> print(scaler.mean_)\n[0.5 0.5]\n>>> print(scaler.transform(data))\n[[-1. -1.]\n [-1. -1.]\n [ 1. 1.]\n [ 1. 1.]]\n>>> print(scaler.transform([[2, 2]]))\n[[3. 3.]]\n\nSee Also\n--------\nscale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA` : Further removes the linear\n correlation across features with 'whiten=True'.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "MaxAbsScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace scaling and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the per-feature minimum and maximum used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the maximum absolute value to be used for later scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the mean and standard deviation used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Online computation of max absolute value of X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data that should be scaled." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale the data\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be scaled.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data that should be transformed back." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be transformed back.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\nmax_abs_ : ndarray of shape (n_features,)\n Per feature maximum absolute value.\n\nn_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MaxAbsScaler\n>>> X = [[ 1., -1., 2.],\n... [ 2., 0., 0.],\n... [ 0., 1., -1.]]\n>>> transformer = MaxAbsScaler().fit(X)\n>>> transformer\nMaxAbsScaler()\n>>> transformer.transform(X)\narray([[ 0.5, -1. , 1. ],\n [ 1. , 0. , 0. ],\n [ 0. , 1. , -0.5]])\n\nSee Also\n--------\nmaxabs_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "RobustScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "with_centering", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling. This will cause ``transform`` to raise an exception when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory." - }, - { - "name": "with_scaling", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to interquartile range." - }, - { - "name": "quantile_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Quantile range used to calculate ``scale_``. .. versionadded:: 0.18" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned." - }, - { - "name": "unit_variance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of ``q_max`` and ``q_min`` for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to compute the median and quantiles used for later scaling along the features axis." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the median and quantiles to be used for scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the median and quantiles\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the specified axis." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Center and scale the data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the specified axis.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The rescaled data to be transformed back." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The rescaled data to be transformed back.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the ``transform`` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nwith_centering : bool, default=True\n If True, center the data before scaling.\n This will cause ``transform`` to raise an exception when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\nwith_scaling : bool, default=True\n If True, scale the data to interquartile range.\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\ncopy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\nunit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncenter_ : array of floats\n The median value for each feature in the training set.\n\nscale_ : array of floats\n The (scaled) interquartile range for each feature in the training set.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\nExamples\n--------\n>>> from sklearn.preprocessing import RobustScaler\n>>> X = [[ 1., -2., 2.],\n... [ -2., 1., 3.],\n... [ 4., 1., -2.]]\n>>> transformer = RobustScaler().fit(X)\n>>> transformer\nRobustScaler()\n>>> transformer.transform(X)\narray([[ 0. , -2. , 0. ],\n [-1. , 0. , 0.4],\n [ 1. , 0. , -1.6]])\n\nSee Also\n--------\nrobust_scale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA`\n Further removes the linear correlation across features with\n 'whiten=True'.\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nhttps://en.wikipedia.org/wiki/Median\nhttps://en.wikipedia.org/wiki/Interquartile_range" - }, - { - "name": "PolynomialFeatures", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The degree of the polynomial features." - }, - { - "name": "interaction_only", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, only interaction features are produced: features that are products of at most ``degree`` *distinct* input features (so not ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.)." - }, - { - "name": "include_bias", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True (default), then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an intercept term in a linear model)." - }, - { - "name": "order", - "type": "Literal['C', 'F']", - "hasDefault": true, - "default": "'C'", - "limitation": null, - "ignored": false, - "docstring": "Order of output array in the dense case. 'F' order is faster to compute, but may slow down subsequent estimators. .. versionadded:: 0.21" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_combinations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "powers_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [ - { - "name": "input_features", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String names for input features if available. By default, \"x0\", \"x1\", ... \"xn_features\" is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return feature names for output features\n\nParameters\n----------\ninput_features : list of str of shape (n_features,), default=None\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : list of str of shape (n_output_features,)" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute number of output features.\n\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to transform, row by row. Prefer CSR over CSC for sparse input (for speed), but CSC is required if the degree is 4 or higher. If the degree is less than 4 and the input format is CSC, it will be converted to CSR, have its polynomial features generated, then converted back to CSC. If the degree is 2 or 3, the method described in \"Leveraging Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is used, which is much faster than the method used on CSC input. For this reason, a CSC input will be converted to CSR, and the output will be converted back to CSC prior to being returned, hence the preference of CSR." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform data to polynomial features\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform, row by row.\n\n Prefer CSR over CSC for sparse input (for speed), but CSC is\n required if the degree is 4 or higher. If the degree is less than\n 4 and the input format is CSC, it will be converted to CSR, have\n its polynomial features generated, then converted back to CSC.\n\n If the degree is 2 or 3, the method described in \"Leveraging\n Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n used, which is much faster than the method used on CSC input. For\n this reason, a CSC input will be converted to CSR, and the output\n will be converted back to CSC prior to being returned, hence the\n preference of CSR.\n\nReturns\n-------\nXP : {ndarray, sparse matrix} of shape (n_samples, NP)\n The matrix of features, where NP is the number of polynomial\n features generated from the combination of inputs. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csr_matrix``." - } - ], - "docstring": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\nParameters\n----------\ndegree : int, default=2\n The degree of the polynomial features.\n\ninteraction_only : bool, default=False\n If true, only interaction features are produced: features that are\n products of at most ``degree`` *distinct* input features (so not\n ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).\n\ninclude_bias : bool, default=True\n If True (default), then include a bias column, the feature in which\n all polynomial powers are zero (i.e. a column of ones - acts as an\n intercept term in a linear model).\n\norder : {'C', 'F'}, default='C'\n Order of output array in the dense case. 'F' order is faster to\n compute, but may slow down subsequent estimators.\n\n .. versionadded:: 0.21\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PolynomialFeatures\n>>> X = np.arange(6).reshape(3, 2)\n>>> X\narray([[0, 1],\n [2, 3],\n [4, 5]])\n>>> poly = PolynomialFeatures(2)\n>>> poly.fit_transform(X)\narray([[ 1., 0., 1., 0., 0., 1.],\n [ 1., 2., 3., 4., 6., 9.],\n [ 1., 4., 5., 16., 20., 25.]])\n>>> poly = PolynomialFeatures(interaction_only=True)\n>>> poly.fit_transform(X)\narray([[ 1., 0., 1., 0.],\n [ 1., 2., 3., 6.],\n [ 1., 4., 5., 20.]])\n\nAttributes\n----------\npowers_ : ndarray of shape (n_output_features, n_input_features)\n powers_[i, j] is the exponent of the jth input in the ith output.\n\nn_input_features_ : int\n The total number of input features.\n\nn_output_features_ : int\n The total number of polynomial output features. The number of output\n features is computed by iterating over all suitably sized combinations\n of input features.\n\nNotes\n-----\nBe aware that the number of features in the output array scales\npolynomially in the number of features of the input array, and\nexponentially in the degree. High degrees can cause overfitting.\n\nSee :ref:`examples/linear_model/plot_polynomial_interpolation.py\n`" - }, - { - "name": "Normalizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "norm", - "type": "Literal['l1', 'l2', 'max']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The norm to use to normalize each non zero sample. If norm='max' is used, values will be rescaled by the maximum of the absolute values." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to estimate the normalization parameters." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Do nothing and return the estimator unchanged\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to estimate the normalization parameters.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to normalize, row by row. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale each non zero row of X to unit norm\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, row by row. scipy.sparse matrices should be\n in CSR format to avoid an un-necessary copy.\n\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnorm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample. If norm='max'\n is used, values will be rescaled by the maximum of the absolute\n values.\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix).\n\nExamples\n--------\n>>> from sklearn.preprocessing import Normalizer\n>>> X = [[4, 1, 2, 2],\n... [1, 3, 9, 3],\n... [5, 7, 5, 1]]\n>>> transformer = Normalizer().fit(X) # fit does nothing.\n>>> transformer\nNormalizer()\n>>> transformer.transform(X)\narray([[0.8, 0.2, 0.4, 0.4],\n [0.1, 0.3, 0.9, 0.3],\n [0.5, 0.7, 0.5, 0.1]])\n\nNotes\n-----\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nSee Also\n--------\nnormalize : Equivalent function without the estimator API." - }, - { - "name": "Binarizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace binarization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to binarize, element by element. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Copy the input X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Binarize each element of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\ncopy : bool\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nthreshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n set to False to perform inplace binarization and avoid a copy (if\n the input is already a numpy array or a scipy.sparse CSR matrix).\n\nExamples\n--------\n>>> from sklearn.preprocessing import Binarizer\n>>> X = [[ 1., -1., 2.],\n... [ 2., 0., 0.],\n... [ 0., 1., -1.]]\n>>> transformer = Binarizer().fit(X) # fit does nothing.\n>>> transformer\nBinarizer()\n>>> transformer.transform(X)\narray([[1., 0., 1.],\n [1., 0., 0.],\n [0., 1., 0.]])\n\nNotes\n-----\nIf the input is a sparse matrix, only the non-zero values are subject\nto update by the Binarizer class.\n\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nSee Also\n--------\nbinarize : Equivalent function without the estimator API." - }, - { - "name": "KernelCenterer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "K", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel matrix." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit KernelCenterer\n\nParameters\n----------\nK : ndarray of shape (n_samples, n_samples)\n Kernel matrix.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "K", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Kernel matrix." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace computation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Center kernel matrix.\n\nParameters\n----------\nK : ndarray of shape (n_samples1, n_samples2)\n Kernel matrix.\n\ncopy : bool, default=True\n Set to False to perform inplace computation.\n\nReturns\n-------\nK_new : ndarray of shape (n_samples1, n_samples2)" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Center a kernel matrix.\n\nLet K(x, z) be a kernel defined by phi(x)^T phi(z), where phi is a\nfunction mapping x to a Hilbert space. KernelCenterer centers (i.e.,\nnormalize to have zero mean) the data without explicitly computing phi(x).\nIt is equivalent to centering phi(x) with\nsklearn.preprocessing.StandardScaler(with_std=False).\n\nRead more in the :ref:`User Guide `.\n\nAttributes\n----------\nK_fit_rows_ : array of shape (n_samples,)\n Average of each column of kernel matrix.\n\nK_fit_all_ : float\n Average of kernel matrix.\n\nExamples\n--------\n>>> from sklearn.preprocessing import KernelCenterer\n>>> from sklearn.metrics.pairwise import pairwise_kernels\n>>> X = [[ 1., -2., 2.],\n... [ -2., 1., 3.],\n... [ 4., 1., -2.]]\n>>> K = pairwise_kernels(X, metric='linear')\n>>> K\narray([[ 9., 2., -2.],\n [ 2., 14., -13.],\n [ -2., -13., 21.]])\n>>> transformer = KernelCenterer().fit(K)\n>>> transformer\nKernelCenterer()\n>>> transformer.transform(K)\narray([[ 5., 0., -5.],\n [ 0., 14., -14.],\n [ -5., -14., 19.]])" - }, - { - "name": "QuantileTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_quantiles", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Number of quantiles to be computed. It corresponds to the number of landmarks used to discretize the cumulative distribution function. If n_quantiles is larger than the number of samples, n_quantiles is set to the number of samples as a larger number of quantiles does not give a better approximation of the cumulative distribution function estimator." - }, - { - "name": "output_distribution", - "type": "Literal['uniform', 'normal']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Marginal distribution for the transformed data. The choices are 'uniform' (default) or 'normal'." - }, - { - "name": "ignore_implicit_zeros", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Only applies to sparse matrices. If True, the sparse entries of the matrix are discarded to compute the quantile statistics. If False, these entries are treated as zeros." - }, - { - "name": "subsample", - "type": "int", - "hasDefault": true, - "default": "1e5", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of samples used to estimate the quantiles for computational efficiency. Note that the subsampling procedure may differ for value-identical sparse and dense matrices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for subsampling and smoothing noise. Please see ``subsample`` for more details. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace transformation and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_dense_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute percentiles for dense matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis." - }, - { - "name": "_sparse_fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. The sparse matrix needs to be nonnegative. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute percentiles for sparse matrices.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis. The sparse matrix\n needs to be nonnegative. If a sparse matrix is provided,\n it will be converted into a sparse ``csc_matrix``." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. Additionally, the sparse matrix needs to be nonnegative if `ignore_implicit_zeros` is False." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the quantiles used for transforming.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "_transform_col", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private function to transform a single feature." - }, - { - "name": "_check_inputs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check inputs before fit and transform." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis." - }, - { - "name": "inverse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If False, apply forward transform. If True, apply inverse transform." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Forward and inverse transform.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n\ninverse : bool, default=False\n If False, apply forward transform. If True, apply\n inverse transform.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n Projected data." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. Additionally, the sparse matrix needs to be nonnegative if `ignore_implicit_zeros` is False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Feature-wise transformation of the data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The projected data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to scale along the features axis. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. Additionally, the sparse matrix needs to be nonnegative if `ignore_implicit_zeros` is False." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Back-projection to the original space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of (n_samples, n_features)\n The projected data." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\nsubsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\ncopy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array).\n\nAttributes\n----------\nn_quantiles_ : int\n The actual number of quantiles used to discretize the cumulative\n distribution function.\n\nquantiles_ : ndarray of shape (n_quantiles, n_features)\n The values corresponding the quantiles of reference.\n\nreferences_ : ndarray of shape (n_quantiles, )\n Quantiles of references.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import QuantileTransformer\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n>>> qt.fit_transform(X)\narray([...])\n\nSee Also\n--------\nquantile_transform : Equivalent function without the estimator API.\nPowerTransformer : Perform mapping to a normal distribution using a power\n transform.\nStandardScaler : Perform standardization that is faster, but less robust\n to outliers.\nRobustScaler : Perform robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "PowerTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "method", - "type": "Literal['yeo-johnson', 'box-cox']", - "hasDefault": true, - "default": "'yeo-johnson'", - "limitation": null, - "ignored": false, - "docstring": "The power transform method. Available methods are: - 'yeo-johnson' [1]_, works with positive and negative values - 'box-cox' [2]_, only works with strictly positive values" - }, - { - "name": "standardize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True to apply zero-mean, unit-variance normalization to the transformed output." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace computation during transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data used to estimate the optimal transformation parameters." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Estimate the optimal parameter lambda for each feature.\n\nThe optimal lambda parameter for minimizing skewness is estimated on\neach feature independently using maximum likelihood.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to estimate the optimal transformation parameters.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed using a power transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the power transform to each feature using the fitted lambdas.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\nReturns\n-------\nX_trans : ndarray of shape (n_samples, n_features)\n The transformed data." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transformed data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\n if lambda_ == 0:\n X = exp(X_trans)\n else:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\n if X >= 0 and lambda_ == 0:\n X = exp(X_trans) - 1\n elif X >= 0 and lambda_ != 0:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n elif X < 0 and lambda_ != 2:\n X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n elif X < 0 and lambda_ == 2:\n X = 1 - exp(-X_trans)\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The transformed data.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The original data." - }, - { - "name": "_box_cox_inverse_tranform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return inverse-transformed input x following Box-Cox inverse\ntransform with parameter lambda." - }, - { - "name": "_yeo_johnson_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return inverse-transformed input x following Yeo-Johnson inverse\ntransform with parameter lambda." - }, - { - "name": "_yeo_johnson_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return transformed input x following Yeo-Johnson transform with\nparameter lambda." - }, - { - "name": "_box_cox_optimize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find and return optimal lambda parameter of the Box-Cox transform by\nMLE, for observed data x.\n\nWe here use scipy builtins which uses the brent optimizer." - }, - { - "name": "_yeo_johnson_optimize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find and return optimal lambda parameter of the Yeo-Johnson\ntransform by MLE, for observed data x.\n\nLike for Box-Cox, MLE is done via the brent optimizer." - }, - { - "name": "_check_input", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "in_fit", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not `_check_input` is called from `fit` or other methods, e.g. `predict`, `transform`, etc." - }, - { - "name": "check_positive", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, check that all data is positive and non-zero (only if ``self.method=='box-cox'``)." - }, - { - "name": "check_shape", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, check that n_features matches the length of self.lambdas_" - }, - { - "name": "check_method", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, check that the transformation method is valid." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate the input before fit and transform.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nin_fit : bool\n Whether or not `_check_input` is called from `fit` or other\n methods, e.g. `predict`, `transform`, etc.\n\ncheck_positive : bool, default=False\n If True, check that all data is positive and non-zero (only if\n ``self.method=='box-cox'``).\n\ncheck_shape : bool, default=False\n If True, check that n_features matches the length of self.lambdas_\n\ncheck_method : bool, default=False\n If True, check that the transformation method is valid." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\nstandardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\ncopy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\nAttributes\n----------\nlambdas_ : ndarray of float of shape (n_features,)\n The parameters of the power transformation for the selected features.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PowerTransformer\n>>> pt = PowerTransformer()\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(pt.fit(data))\nPowerTransformer()\n>>> print(pt.lambdas_)\n[ 1.386... -3.100...]\n>>> print(pt.transform(data))\n[[-1.316... -0.707...]\n [ 0.209... -0.707...]\n [ 1.106... 1.414...]]\n\nSee Also\n--------\npower_transform : Equivalent function without the estimator API.\n\nQuantileTransformer : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964)." - } - ], - "functions": [ - { - "name": "_handle_zeros_in_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Makes sure that whenever scale is zero, we handle it correctly.\n\nThis happens in most scalers when we have constant features." - }, - { - "name": "scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to center and scale." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "axis used to compute the means and standard deviations along. If 0, independently standardize each feature, otherwise (if 1) standardize each sample." - }, - { - "name": "with_mean", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling." - }, - { - "name": "with_std", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to unit variance (or equivalently, unit standard deviation)." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSC matrix and if axis is 1)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to center and scale.\n\naxis : int, default=0\n axis used to compute the means and standard deviations along. If 0,\n independently standardize each feature, otherwise (if 1) standardize\n each sample.\n\nwith_mean : bool, default=True\n If True, center the data before scaling.\n\nwith_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSC matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_mean=False` (in that case, only variance scaling will be\nperformed on the features of the CSC matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSC matrix.\n\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.StandardScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n\nSee Also\n--------\nStandardScaler : Performs scaling to unit variance using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`)." - }, - { - "name": "minmax_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "feature_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Desired range of transformed data." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Axis used to scale along. If 0, independently scale each feature, otherwise (if 1) scale each sample." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace scaling and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\n X_scaled = scale * X + min - X.min(axis=0) * scale\n where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n *minmax_scale* function interface\n to :class:`~sklearn.preprocessing.MinMaxScaler`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\nfeature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\naxis : int, default=0\n Axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n The transformed data.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.minmax_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MinMaxScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MinMaxScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMinMaxScaler : Performs scaling to a given range using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "maxabs_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "axis used to scale along. If 0, independently scale each feature, otherwise (if 1) scale each sample." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace scaling and avoid a copy (if the input is already a numpy array)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\naxis : int, default=0\n axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know what\n you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMaxAbsScaler : Performs scaling to the [-1, 1] range using\n the Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "robust_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to center and scale." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "axis used to compute the medians and IQR along. If 0, independently scale each feature, otherwise (if 1) scale each sample." - }, - { - "name": "with_centering", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, center the data before scaling." - }, - { - "name": "with_scaling", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, scale the data to unit variance (or equivalently, unit standard deviation)." - }, - { - "name": "quantile_range", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR Quantile range used to calculate ``scale_``. .. versionadded:: 0.18" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix and if axis is 1)." - }, - { - "name": "unit_variance", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of ``q_max`` and ``q_min`` for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Standardize a dataset along any axis\n\nCenter to the median and component wise scale\naccording to the interquartile range.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_sample, n_features)\n The data to center and scale.\n\naxis : int, default=0\n axis used to compute the medians and IQR along. If 0,\n independently scale each feature, otherwise (if 1) scale\n each sample.\n\nwith_centering : bool, default=True\n If True, center the data before scaling.\n\nwith_scaling : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0\n default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\nunit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_centering=False` (in that case, only variance scaling will be\nperformed on the features of the CSR matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSR matrix.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.robust_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.RobustScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(RobustScaler(), LogisticRegression())`.\n\nSee Also\n--------\nRobustScaler : Performs centering and scaling using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`)." - }, - { - "name": "normalize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to normalize, element by element. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy." - }, - { - "name": "norm", - "type": "Literal['l1', 'l2', 'max']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "The norm to use to normalize each non zero sample (or each non-zero feature if axis is 0)." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "axis used to normalize the data along. If 1, independently normalize each sample, otherwise (if 0) normalize each feature." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix and if axis is 1)." - }, - { - "name": "return_norm", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "whether to return the computed norms" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Scale input vectors individually to unit norm (vector length).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\nnorm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample (or each non-zero\n feature if axis is 0).\n\naxis : {0, 1}, default=1\n axis used to normalize the data along. If 1, independently normalize\n each sample, otherwise (if 0) normalize each feature.\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\nreturn_norm : bool, default=False\n whether to return the computed norms\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Normalized input X.\n\nnorms : ndarray of shape (n_samples, ) if axis=1 else (n_features, )\n An array of norms along given axis for X.\n When X is sparse, a NotImplementedError will be raised\n for norm 'l1' or 'l2'.\n\nSee Also\n--------\nNormalizer : Performs normalization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "binarize", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to binarize, element by element. scipy.sparse matrices should be in CSR or CSC format to avoid an un-necessary copy." - }, - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "set to False to perform inplace binarization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR / CSC matrix and if axis is 1)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Boolean thresholding of array-like or scipy.sparse matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR or CSC format to avoid an\n un-necessary copy.\n\nthreshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n set to False to perform inplace binarization and avoid a copy\n (if the input is already a numpy array or a scipy.sparse CSR / CSC\n matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nSee Also\n--------\nBinarizer : Performs binarization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`)." - }, - { - "name": "add_dummy_feature", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data." - }, - { - "name": "value", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Value to use for the dummy feature." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Augment dataset with an additional dummy feature.\n\nThis is useful for fitting an intercept term with implementations which\ncannot otherwise fit it directly.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nvalue : float\n Value to use for the dummy feature.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features + 1)\n Same data with dummy feature added as first column.\n\nExamples\n--------\n>>> from sklearn.preprocessing import add_dummy_feature\n>>> add_dummy_feature([[0, 1], [1, 0]])\narray([[1., 0., 1.],\n [1., 1., 0.]])" - }, - { - "name": "quantile_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to transform." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Axis used to compute the means and standard deviations along. If 0, transform each feature, otherwise (if 1) transform each sample." - }, - { - "name": "n_quantiles", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Number of quantiles to be computed. It corresponds to the number of landmarks used to discretize the cumulative distribution function. If n_quantiles is larger than the number of samples, n_quantiles is set to the number of samples as a larger number of quantiles does not give a better approximation of the cumulative distribution function estimator." - }, - { - "name": "output_distribution", - "type": "Literal['uniform', 'normal']", - "hasDefault": true, - "default": "'uniform'", - "limitation": null, - "ignored": false, - "docstring": "Marginal distribution for the transformed data. The choices are 'uniform' (default) or 'normal'." - }, - { - "name": "ignore_implicit_zeros", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Only applies to sparse matrices. If True, the sparse entries of the matrix are discarded to compute the quantile statistics. If False, these entries are treated as zeros." - }, - { - "name": "subsample", - "type": "int", - "hasDefault": true, - "default": "1e5", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of samples used to estimate the quantiles for computational efficiency. Note that the subsampling procedure may differ for value-identical sparse and dense matrices." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for subsampling and smoothing noise. Please see ``subsample`` for more details. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace transformation and avoid a copy (if the input is already a numpy array). If True, a copy of `X` is transformed, leaving the original `X` unchanged ..versionchanged:: 0.23 The default value of `copy` changed from False to True in 0.23." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform.\n\naxis : int, default=0\n Axis used to compute the means and standard deviations along. If 0,\n transform each feature, otherwise (if 1) transform each sample.\n\nn_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\nsubsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\ncopy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array). If True, a copy of `X` is transformed,\n leaving the original `X` unchanged\n\n ..versionchanged:: 0.23\n The default value of `copy` changed from False to True in 0.23.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import quantile_transform\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\narray([...])\n\nSee Also\n--------\nQuantileTransformer : Performs quantile-based scaling using the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\npower_transform : Maps data to a normal distribution using a\n power transformation.\nscale : Performs standardization that is faster, but less robust\n to outliers.\nrobust_scale : Performs robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n you know what you are doing. A common mistake is to apply it\n to the entire data *before* splitting into training and\n test sets. This will bias the model evaluation because\n information would have leaked from the test set to the\n training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.QuantileTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking:`pipe = make_pipeline(QuantileTransformer(),\n LogisticRegression())`.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`." - }, - { - "name": "power_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to be transformed using a power transformation." - }, - { - "name": "method", - "type": "Literal['yeo-johnson', 'box-cox']", - "hasDefault": true, - "default": "'yeo-johnson'", - "limitation": null, - "ignored": false, - "docstring": "The power transform method. Available methods are: - 'yeo-johnson' [1]_, works with positive and negative values - 'box-cox' [2]_, only works with strictly positive values .. versionchanged:: 0.23 The default value of the `method` parameter changed from 'box-cox' to 'yeo-johnson' in 0.23." - }, - { - "name": "standardize", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to True to apply zero-mean, unit-variance normalization to the transformed output." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Set to False to perform inplace computation during transformation." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Power transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, power_transform supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\n .. versionchanged:: 0.23\n The default value of the `method` parameter changed from\n 'box-cox' to 'yeo-johnson' in 0.23.\n\nstandardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\ncopy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\nReturns\n-------\nX_trans : ndarray of shape (n_samples, n_features)\n The transformed data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import power_transform\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(power_transform(data, method='box-cox'))\n[[-1.332... -0.707...]\n [ 0.256... -0.707...]\n [ 1.076... 1.414...]]\n\n.. warning:: Risk of data leak.\n Do not use :func:`~sklearn.preprocessing.power_transform` unless you\n know what you are doing. A common mistake is to apply it to the entire\n data *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.PowerTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking, e.g.: `pipe = make_pipeline(PowerTransformer(),\n LogisticRegression())`.\n\nSee Also\n--------\nPowerTransformer : Equivalent transformation with the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nquantile_transform : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964)." - } - ] - }, - { - "name": "sklearn.preprocessing._discretization", - "imports": [ - "import numbers", - "import numpy as np", - "import warnings", - "from None import OneHotEncoder", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from cluster import KMeans" - ], - "classes": [ - { - "name": "KBinsDiscretizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "n_bins", - "type": "Union[ArrayLike, int]", - "hasDefault": true, - "default": "5", - "limitation": null, - "ignored": false, - "docstring": "The number of bins to produce. Raises ValueError if ``n_bins < 2``." - }, - { - "name": "encode", - "type": "Literal['onehot', 'onehot-dense', 'ordinal']", - "hasDefault": true, - "default": "'onehot'", - "limitation": null, - "ignored": false, - "docstring": "Method used to encode the transformed result. onehot Encode the transformed result with one-hot encoding and return a sparse matrix. Ignored features are always stacked to the right. onehot-dense Encode the transformed result with one-hot encoding and return a dense array. Ignored features are always stacked to the right. ordinal Return the bin identifier encoded as an integer value." - }, - { - "name": "strategy", - "type": "Literal['uniform', 'quantile', 'kmeans']", - "hasDefault": true, - "default": "'quantile'", - "limitation": null, - "ignored": false, - "docstring": "Strategy used to define the widths of the bins. uniform All bins in each feature have identical widths. quantile All bins in each feature have the same number of points. kmeans Values in each bin have the same nearest center of a 1D k-means cluster." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The desired data-type for the output. If None, output dtype is consistent with input dtype. Only np.float32 and np.float64 are supported. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to be discretized." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the estimator.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself" - }, - { - "name": "_validate_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns n_bins_, the number of bins per feature.\n " - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to be discretized." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Discretize the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\nReturns\n-------\nXt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n Data in the binned space. Will be a sparse matrix if\n `self.encode='onehot'` and ndarray otherwise." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Xt", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Transformed data in the binned space." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform discretized data back to original feature space.\n\nNote that this function does not regenerate the original data\ndue to discretization rounding.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_features)\n Transformed data in the binned space.\n\nReturns\n-------\nXinv : ndarray, dtype={np.float32, np.float64}\n Data in the original feature space." - } - ], - "docstring": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nn_bins : int or array-like of shape (n_features,), default=5\n The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\nencode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n Method used to encode the transformed result.\n\n onehot\n Encode the transformed result with one-hot encoding\n and return a sparse matrix. Ignored features are always\n stacked to the right.\n onehot-dense\n Encode the transformed result with one-hot encoding\n and return a dense array. Ignored features are always\n stacked to the right.\n ordinal\n Return the bin identifier encoded as an integer value.\n\nstrategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n Strategy used to define the widths of the bins.\n\n uniform\n All bins in each feature have identical widths.\n quantile\n All bins in each feature have the same number of points.\n kmeans\n Values in each bin have the same nearest center of a 1D k-means\n cluster.\n\ndtype : {np.float32, np.float64}, default=None\n The desired data-type for the output. If None, output dtype is\n consistent with input dtype. Only np.float32 and np.float64 are\n supported.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_bins_ : ndarray of shape (n_features,), dtype=np.int_\n Number of bins per feature. Bins whose width are too small\n (i.e., <= 1e-8) are removed with a warning.\n\nbin_edges_ : ndarray of ndarray of shape (n_features,)\n The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n Ignored features will have empty arrays.\n\nSee Also\n--------\nBinarizer : Class used to bin values as ``0`` or\n ``1`` based on a parameter ``threshold``.\n\nNotes\n-----\nIn bin edges for feature ``i``, the first and last values are used only for\n``inverse_transform``. During transform, bin edges are extended to::\n\n np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\nYou can combine ``KBinsDiscretizer`` with\n:class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\npart of the features.\n\n``KBinsDiscretizer`` might produce constant features (e.g., when\n``encode = 'onehot'`` and certain bins do not contain any data).\nThese features can be removed with feature selection algorithms\n(e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\nExamples\n--------\n>>> X = [[-2, 1, -4, -1],\n... [-1, 2, -3, -0.5],\n... [ 0, 3, -2, 0.5],\n... [ 1, 4, -1, 2]]\n>>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n>>> est.fit(X)\nKBinsDiscretizer(...)\n>>> Xt = est.transform(X)\n>>> Xt # doctest: +SKIP\narray([[ 0., 0., 0., 0.],\n [ 1., 1., 1., 0.],\n [ 2., 2., 2., 1.],\n [ 2., 2., 2., 2.]])\n\nSometimes it may be useful to convert the data back into the original\nfeature space. The ``inverse_transform`` function converts the binned\ndata into the original feature space. Each value will be equal to the mean\nof the two bin edges.\n\n>>> est.bin_edges_[0]\narray([-2., -1., 0., 1.])\n>>> est.inverse_transform(Xt)\narray([[-1.5, 1.5, -3.5, -0.5],\n [-0.5, 2.5, -2.5, -0.5],\n [ 0.5, 3.5, -1.5, 0.5],\n [ 0.5, 3.5, -1.5, 1.5]])" - } - ], - "functions": [] - }, - { - "name": "sklearn.preprocessing._encoders", - "imports": [ - "import numpy as np", - "from scipy import sparse", - "import numbers", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils import check_array", - "from utils import is_scalar_nan", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from utils._encode import _encode", - "from utils._encode import _check_unknown", - "from utils._encode import _unique" - ], - "classes": [ - { - "name": "_BaseEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_check_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n not do that)\n- return list of features (arrays): this list of features is\n constructed feature by feature to preserve the data types\n of pandas DataFrame columns, as otherwise information is lost\n and cannot be used, eg for the `categories_` attribute." - }, - { - "name": "_get_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for encoders that includes the code to categorize and\ntransform the input features." - }, - { - "name": "OneHotEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "categories", - "type": "Literal['auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - list : ``categories[i]`` holds the categories expected in the ith column. The passed categories should not mix strings and numeric values within a single feature, and should be sorted in case of numeric values. The used categories can be found in the ``categories_`` attribute. .. versionadded:: 0.20" - }, - { - "name": "drop", - "type": "Literal['first', 'if_binary']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data into a neural network or an unregularized regression. However, dropping one category breaks the symmetry of the original representation and can therefore induce a bias in downstream models, for instance for penalized linear classification or regression models. - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - 'if_binary' : drop the first category in each feature with two categories. Features with 1 or more than 2 categories are left intact. - array : ``drop[i]`` is the category in feature ``X[:, i]`` that should be dropped. .. versionchanged:: 0.23 Added option 'if_binary'." - }, - { - "name": "sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Will return sparse matrix if set True else will return an array." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "float", - "limitation": null, - "ignored": false, - "docstring": "Desired dtype of output." - }, - { - "name": "handle_unknown", - "type": "Literal['error', 'ignore']", - "hasDefault": true, - "default": "'error'", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error or ignore if an unknown categorical feature is present during transform (default is to raise). When this parameter is set to 'ignore' and an unknown category is encountered during transform, the resulting one-hot encoded columns for this feature will be all zeros. In the inverse transform, an unknown category will be denoted as None." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_keywords", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_drop_idx", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to determine the categories of each feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit OneHotEncoder to X.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to determine the categories of each feature.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to encode." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit OneHotEncoder to X, then transform X.\n\nEquivalent to fit(X).transform(X) but more convenient.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to encode.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nX_out : sparse matrix if sparse=True else a 2-d array\n Transformed input." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to encode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform X using one-hot encoding.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to encode.\n\nReturns\n-------\nX_out : sparse matrix if sparse=True else a 2-d array\n Transformed input." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transformed data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the data back to the original representation.\n\nIn case unknown categories are encountered (all zeros in the\none-hot encoding), ``None`` is used to represent this category.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_encoded_features]\n The transformed data.\n\nReturns\n-------\nX_tr : array-like, shape [n_samples, n_features]\n Inverse transformed array." - }, - { - "name": "get_feature_names", - "decorators": [], - "parameters": [ - { - "name": "input_features", - "type": "List[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String names for input features if available. By default, \"x0\", \"x1\", ... \"xn_features\" is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return feature names for output features.\n\nParameters\n----------\ninput_features : list of str of shape (n_features,)\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : ndarray of shape (n_output_features,)\n Array of feature names." - } - ], - "docstring": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values within a single feature, and should be sorted in case of\n numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\n .. versionadded:: 0.20\n\ndrop : {'first', 'if_binary'} or a array-like of shape (n_features,), default=None\n Specifies a methodology to use to drop one of the categories per\n feature. This is useful in situations where perfectly collinear\n features cause problems, such as when feeding the resulting data\n into a neural network or an unregularized regression.\n\n However, dropping one category breaks the symmetry of the original\n representation and can therefore induce a bias in downstream models,\n for instance for penalized linear classification or regression models.\n\n - None : retain all features (the default).\n - 'first' : drop the first category in each feature. If only one\n category is present, the feature will be dropped entirely.\n - 'if_binary' : drop the first category in each feature with two\n categories. Features with 1 or more than 2 categories are\n left intact.\n - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n should be dropped.\n\n .. versionchanged:: 0.23\n Added option 'if_binary'.\n\nsparse : bool, default=True\n Will return sparse matrix if set True else will return an array.\n\ndtype : number type, default=float\n Desired dtype of output.\n\nhandle_unknown : {'error', 'ignore'}, default='error'\n Whether to raise an error or ignore if an unknown categorical feature\n is present during transform (default is to raise). When this parameter\n is set to 'ignore' and an unknown category is encountered during\n transform, the resulting one-hot encoded columns for this feature\n will be all zeros. In the inverse transform, an unknown category\n will be denoted as None.\n\nAttributes\n----------\ncategories_ : list of arrays\n The categories of each feature determined during fitting\n (in order of the features in X and corresponding with the output\n of ``transform``). This includes the category specified in ``drop``\n (if any).\n\ndrop_idx_ : array of shape (n_features,)\n - ``drop_idx_[i]`` is\u00a0the index in ``categories_[i]`` of the category\n to be dropped for each feature.\n - ``drop_idx_[i] = None`` if no category is to be dropped from the\n feature with index ``i``, e.g. when `drop='if_binary'` and the\n feature isn't binary.\n - ``drop_idx_ = None`` if all the transformed features will be\n retained.\n\n .. versionchanged:: 0.23\n Added the possibility to contain `None` values.\n\nSee Also\n--------\nOrdinalEncoder : Performs an ordinal (integer)\n encoding of the categorical features.\nsklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n dictionary items (also handles string-valued features).\nsklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n encoding of dictionary items or strings.\nLabelBinarizer : Binarizes labels in a one-vs-all\n fashion.\nMultiLabelBinarizer : Transforms between iterable of\n iterables and a multilabel format, e.g. a (samples x classes) binary\n matrix indicating the presence of a class label.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to a binary one-hot encoding.\n\n>>> from sklearn.preprocessing import OneHotEncoder\n\nOne can discard categories not seen during `fit`:\n\n>>> enc = OneHotEncoder(handle_unknown='ignore')\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOneHotEncoder(handle_unknown='ignore')\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\narray([[1., 0., 1., 0., 0.],\n [0., 1., 0., 0., 0.]])\n>>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\narray([['Male', 1],\n [None, 2]], dtype=object)\n>>> enc.get_feature_names(['gender', 'group'])\narray(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n dtype=object)\n\nOne can always drop the first column for each feature:\n\n>>> drop_enc = OneHotEncoder(drop='first').fit(X)\n>>> drop_enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 0., 0.],\n [1., 1., 0.]])\n\nOr drop a column for feature only having 2 categories:\n\n>>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n>>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 1., 0., 0.],\n [1., 0., 1., 0.]])" - }, - { - "name": "OrdinalEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "categories", - "type": "Literal['auto']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - list : ``categories[i]`` holds the categories expected in the ith column. The passed categories should not mix strings and numeric values, and should be sorted in case of numeric values. The used categories can be found in the ``categories_`` attribute." - }, - { - "name": "dtype", - "type": null, - "hasDefault": true, - "default": "np", - "limitation": null, - "ignored": false, - "docstring": "Desired dtype of output." - }, - { - "name": "handle_unknown", - "type": "Literal['error', 'use_encoded_value']", - "hasDefault": true, - "default": "'error'", - "limitation": null, - "ignored": false, - "docstring": "When set to 'error' an error will be raised in case an unknown categorical feature is present during transform. When set to 'use_encoded_value', the encoded value of unknown categories will be set to the value given for the parameter `unknown_value`. In :meth:`inverse_transform`, an unknown category will be denoted as None. .. versionadded:: 0.24" - }, - { - "name": "unknown_value", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "When the parameter handle_unknown is set to 'use_encoded_value', this parameter is required and will set the encoded value of unknown categories. It has to be distinct from the values used to encode any of the categories in `fit`. If set to np.nan, the `dtype` parameter must be a float dtype. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to determine the categories of each feature." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the OrdinalEncoder to X.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to determine the categories of each feature.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data to encode." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transform X to ordinal codes.\n\nParameters\n----------\nX : array-like, shape [n_samples, n_features]\n The data to encode.\n\nReturns\n-------\nX_out : sparse matrix or a 2-d array\n Transformed input." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The transformed data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert the data back to the original representation.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_encoded_features]\n The transformed data.\n\nReturns\n-------\nX_tr : array-like, shape [n_samples, n_features]\n Inverse transformed array." - } - ], - "docstring": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values, and should be sorted in case of numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\ndtype : number type, default np.float64\n Desired dtype of output.\n\nhandle_unknown : {'error', 'use_encoded_value'}, default='error'\n When set to 'error' an error will be raised in case an unknown\n categorical feature is present during transform. When set to\n 'use_encoded_value', the encoded value of unknown categories will be\n set to the value given for the parameter `unknown_value`. In\n :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n .. versionadded:: 0.24\n\nunknown_value : int or np.nan, default=None\n When the parameter handle_unknown is set to 'use_encoded_value', this\n parameter is required and will set the encoded value of unknown\n categories. It has to be distinct from the values used to encode any of\n the categories in `fit`. If set to np.nan, the `dtype` parameter must\n be a float dtype.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategories_ : list of arrays\n The categories of each feature determined during ``fit`` (in order of\n the features in X and corresponding with the output of ``transform``).\n This does not include categories that weren't seen during ``fit``.\n\nSee Also\n--------\nOneHotEncoder : Performs a one-hot encoding of categorical features.\nLabelEncoder : Encodes target labels with values between 0 and\n ``n_classes-1``.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to an ordinal encoding.\n\n>>> from sklearn.preprocessing import OrdinalEncoder\n>>> enc = OrdinalEncoder()\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOrdinalEncoder()\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 3], ['Male', 1]])\narray([[0., 2.],\n [1., 0.]])\n\n>>> enc.inverse_transform([[1, 0], [0, 1]])\narray([['Male', 1],\n ['Female', 2]], dtype=object)" - } - ], - "functions": [] - }, - { - "name": "sklearn.preprocessing._function_transformer", - "imports": [ - "import warnings", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.validation import _allclose_dense_sparse", - "from utils.validation import _deprecate_positional_args" - ], - "classes": [ - { - "name": "FunctionTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function." - }, - { - "name": "inverse_func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function." - }, - { - "name": "validate", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or sparse matrix. If the conversion is not possible an exception is raised. .. versionchanged:: 0.22 The default of ``validate`` changed from True to False." - }, - { - "name": "accept_sparse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised." - }, - { - "name": "check_inverse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20" - }, - { - "name": "kw_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18" - }, - { - "name": "inv_kw_args", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that func and inverse_func are the inverse." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input array." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit transformer by checking X.\n\nIf ``validate`` is ``True``, ``X`` will be checked.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nself" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X using the forward function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n Transformed input." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform X using the inverse function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n Transformed input." - }, - { - "name": "_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfunc : callable, default=None\n The callable to use for the transformation. This will be passed\n the same arguments as transform, with args and kwargs forwarded.\n If func is None, then func will be the identity function.\n\ninverse_func : callable, default=None\n The callable to use for the inverse transformation. This will be\n passed the same arguments as inverse transform, with args and\n kwargs forwarded. If inverse_func is None, then inverse_func\n will be the identity function.\n\nvalidate : bool, default=False\n Indicate that the input X array should be checked before calling\n ``func``. The possibilities are:\n\n - If False, there is no input validation.\n - If True, then X will be converted to a 2-dimensional NumPy array or\n sparse matrix. If the conversion is not possible an exception is\n raised.\n\n .. versionchanged:: 0.22\n The default of ``validate`` changed from True to False.\n\naccept_sparse : bool, default=False\n Indicate that func accepts a sparse matrix as input. If validate is\n False, this has no effect. Otherwise, if accept_sparse is false,\n sparse matrix inputs will cause an exception to be raised.\n\ncheck_inverse : bool, default=True\n Whether to check that or ``func`` followed by ``inverse_func`` leads to\n the original inputs. It can be used for a sanity check, raising a\n warning when the condition is not fulfilled.\n\n .. versionadded:: 0.20\n\nkw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to func.\n\n .. versionadded:: 0.18\n\ninv_kw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to inverse_func.\n\n .. versionadded:: 0.18\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import FunctionTransformer\n>>> transformer = FunctionTransformer(np.log1p)\n>>> X = np.array([[0, 1], [2, 3]])\n>>> transformer.transform(X)\narray([[0. , 0.6931...],\n [1.0986..., 1.3862...]])" - } - ], - "functions": [ - { - "name": "_identity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "The identity function.\n " - } - ] - }, - { - "name": "sklearn.preprocessing._label", - "imports": [ - "from collections import defaultdict", - "import itertools", - "import array", - "import warnings", - "import numpy as np", - "import scipy.sparse as sp", - "from base import BaseEstimator", - "from base import TransformerMixin", - "from utils.sparsefuncs import min_max_axis", - "from utils import column_or_1d", - "from utils.validation import check_array", - "from utils.validation import check_is_fitted", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import unique_labels", - "from utils.multiclass import type_of_target", - "from utils._encode import _encode", - "from utils._encode import _unique" - ], - "classes": [ - { - "name": "LabelEncoder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit label encoder.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit label encoder and return encoded labels.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)" - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform labels to normalized encoding.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)" - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform labels back to original encoding.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.12\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\nExamples\n--------\n`LabelEncoder` can be used to normalize labels.\n\n>>> from sklearn import preprocessing\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([1, 2, 2, 6])\nLabelEncoder()\n>>> le.classes_\narray([1, 2, 6])\n>>> le.transform([1, 1, 2, 6])\narray([0, 0, 1, 2]...)\n>>> le.inverse_transform([0, 0, 1, 2])\narray([1, 1, 2, 6])\n\nIt can also be used to transform non-numerical labels (as long as they are\nhashable and comparable) to numerical labels.\n\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\nLabelEncoder()\n>>> list(le.classes_)\n['amsterdam', 'paris', 'tokyo']\n>>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\narray([2, 2, 1]...)\n>>> list(le.inverse_transform([2, 2, 1]))\n['tokyo', 'tokyo', 'paris']\n\nSee Also\n--------\nOrdinalEncoder : Encode categorical features using an ordinal encoding\n scheme.\nOneHotEncoder : Encode categorical features as a one-hot numeric array." - }, - { - "name": "LabelBinarizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "neg_label", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Value with which negative labels must be encoded." - }, - { - "name": "pos_label", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Value with which positive labels must be encoded." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "True if the returned array from transform is desired to be in sparse CSR format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit label binarizer.\n\nParameters\n----------\ny : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification.\n\nReturns\n-------\nself : returns an instance of self." - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification. Sparse matrix can be CSR, CSC, COO, DOK, or LIL." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit label binarizer and transform multi-class labels to binary\nlabels.\n\nThe output of transform is sometimes referred to as\nthe 1-of-K coding scheme.\n\nParameters\n----------\ny : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. The 2-d matrix should only contain 0 and 1, represents multilabel classification. Sparse matrix can be CSR, CSC, COO, DOK, or LIL." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform multi-class labels to binary labels.\n\nThe output of transform is sometimes referred to by some authors as\nthe 1-of-K coding scheme.\n\nParameters\n----------\ny : {array, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "Y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values. All sparse matrices are converted to CSR before inverse transformation." - }, - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold used in the binary and multi-label cases. Use 0 when ``Y`` contains the output of decision_function (classifier). Use 0.5 when ``Y`` contains the output of predict_proba. If None, the threshold is assumed to be half way between neg_label and pos_label." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform binary labels back to multi-class labels.\n\nParameters\n----------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Target values. All sparse matrices are converted to CSR before\n inverse transformation.\n\nthreshold : float, default=None\n Threshold used in the binary and multi-label cases.\n\n Use 0 when ``Y`` contains the output of decision_function\n (classifier).\n Use 0.5 when ``Y`` contains the output of predict_proba.\n\n If None, the threshold is assumed to be half way between\n neg_label and pos_label.\n\nReturns\n-------\ny : {ndarray, sparse matrix} of shape (n_samples,)\n Target values. Sparse matrix will be of CSR format.\n\nNotes\n-----\nIn the case when the binary labels are fractional\n(probabilistic), inverse_transform chooses the class with the\ngreatest value. Typically, this allows to use the output of a\nlinear model's decision_function method directly as the input\nof inverse_transform." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nneg_label : int, default=0\n Value with which negative labels must be encoded.\n\npos_label : int, default=1\n Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False\n True if the returned array from transform is desired to be in sparse\n CSR format.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\ny_type_ : str\n Represents the type of the target data as evaluated by\n utils.multiclass.type_of_target. Possible type are 'continuous',\n 'continuous-multioutput', 'binary', 'multiclass',\n 'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\nsparse_input_ : bool\n True if the input data to transform is given as a sparse matrix, False\n otherwise.\n\nExamples\n--------\n>>> from sklearn import preprocessing\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit([1, 2, 6, 4, 2])\nLabelBinarizer()\n>>> lb.classes_\narray([1, 2, 4, 6])\n>>> lb.transform([1, 6])\narray([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\nBinary targets transform to a column vector\n\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\narray([[1],\n [0],\n [0],\n [1]])\n\nPassing a 2D matrix for multilabel classification\n\n>>> import numpy as np\n>>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\nLabelBinarizer()\n>>> lb.classes_\narray([0, 1, 2])\n>>> lb.transform([0, 1, 2, 1])\narray([[1, 0, 0],\n [0, 1, 0],\n [0, 0, 1],\n [0, 1, 0]])\n\nSee Also\n--------\nlabel_binarize : Function to perform the transform operation of\n LabelBinarizer with fixed classes.\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme." - }, - { - "name": "MultiLabelBinarizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indicates an ordering for the class labels. All entries should be unique (cannot contain duplicate classes)." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Set to True if output binary array is desired in CSR sparse format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A set of labels (any orderable and hashable object) for each sample. If the `classes` parameter is set, `y` will not be iterated." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the label sets binarizer, storing :term:`classes_`.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\nself : returns this MultiLabelBinarizer instance" - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A set of labels (any orderable and hashable object) for each sample. If the `classes` parameter is set, `y` will not be iterated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fit the label sets binarizer and transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\ny_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` i.f.f. `classes_[j]`\n is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n format." - }, - { - "name": "transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A set of labels (any orderable and hashable object) for each sample. If the `classes` parameter is set, `y` will not be iterated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\ny_indicator : array or CSR matrix, shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n `y[i]`, and 0 otherwise." - }, - { - "name": "_build_cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_transform", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "class_mapping", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Maps from label to column index in label indicator matrix." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Transforms the label sets with a given mapping\n\nParameters\n----------\ny : iterable of iterables\nclass_mapping : Mapping\n Maps from label to column index in label indicator matrix.\n\nReturns\n-------\ny_indicator : sparse matrix of shape (n_samples, n_classes)\n Label indicator matrix. Will be of CSR format." - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [ - { - "name": "yt", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A matrix containing only 1s ands 0s." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Transform the given indicator matrix into label sets.\n\nParameters\n----------\nyt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix containing only 1s ands 0s.\n\nReturns\n-------\ny : list of tuples\n The set of labels for each sample such that `y[i]` consists of\n `classes_[j]` for each `yt[i, j] == 1`." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.\n\nParameters\n----------\nclasses : array-like of shape (n_classes,), default=None\n Indicates an ordering for the class labels.\n All entries should be unique (cannot contain duplicate classes).\n\nsparse_output : bool, default=False\n Set to True if output binary array is desired in CSR sparse format.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n A copy of the `classes` parameter when provided.\n Otherwise it corresponds to the sorted set of classes found\n when fitting.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MultiLabelBinarizer\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit_transform([(1, 2), (3,)])\narray([[1, 1, 0],\n [0, 0, 1]])\n>>> mlb.classes_\narray([1, 2, 3])\n\n>>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\narray([[0, 1, 1],\n [1, 0, 0]])\n>>> list(mlb.classes_)\n['comedy', 'sci-fi', 'thriller']\n\nA common mistake is to pass in a list, which leads to the following issue:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n 'y'], dtype=object)\n\nTo correct this, the list of labels should be passed in as:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['comedy', 'sci-fi', 'thriller'], dtype=object)\n\nSee Also\n--------\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme." - } - ], - "functions": [ - { - "name": "label_binarize", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sequence of integer labels or multilabel data to encode." - }, - { - "name": "classes", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Uniquely holds the label for each class." - }, - { - "name": "neg_label", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Value with which negative labels must be encoded." - }, - { - "name": "pos_label", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Value with which positive labels must be encoded." - }, - { - "name": "sparse_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Set to true if output binary array is desired in CSR sparse format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nThis function makes it possible to compute this transformation for a\nfixed set of class labels known ahead of time.\n\nParameters\n----------\ny : array-like\n Sequence of integer labels or multilabel data to encode.\n\nclasses : array-like of shape (n_classes,)\n Uniquely holds the label for each class.\n\nneg_label : int, default=0\n Value with which negative labels must be encoded.\n\npos_label : int, default=1\n Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False,\n Set to true if output binary array is desired in CSR sparse format.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix will\n be of CSR format.\n\nExamples\n--------\n>>> from sklearn.preprocessing import label_binarize\n>>> label_binarize([1, 6], classes=[1, 2, 4, 6])\narray([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\nThe class ordering is preserved:\n\n>>> label_binarize([1, 6], classes=[1, 6, 4, 2])\narray([[1, 0, 0, 0],\n [0, 1, 0, 0]])\n\nBinary targets transform to a column vector\n\n>>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])\narray([[1],\n [0],\n [0],\n [1]])\n\nSee Also\n--------\nLabelBinarizer : Class used to wrap the functionality of label_binarize and\n allow for fitting to classes independently of the transform operation." - }, - { - "name": "_inverse_binarize_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse label binarization transformation for multiclass.\n\nMulticlass uses the maximal score instead of a threshold." - }, - { - "name": "_inverse_binarize_thresholding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inverse label binarization transformation using thresholding." - } - ] - }, - { - "name": "sklearn.preprocessing", - "imports": [ - "from _function_transformer import FunctionTransformer", - "from _data import Binarizer", - "from _data import KernelCenterer", - "from _data import MinMaxScaler", - "from _data import MaxAbsScaler", - "from _data import Normalizer", - "from _data import RobustScaler", - "from _data import StandardScaler", - "from _data import QuantileTransformer", - "from _data import add_dummy_feature", - "from _data import binarize", - "from _data import normalize", - "from _data import scale", - "from _data import robust_scale", - "from _data import maxabs_scale", - "from _data import minmax_scale", - "from _data import quantile_transform", - "from _data import power_transform", - "from _data import PowerTransformer", - "from _data import PolynomialFeatures", - "from _encoders import OneHotEncoder", - "from _encoders import OrdinalEncoder", - "from _label import label_binarize", - "from _label import LabelBinarizer", - "from _label import LabelEncoder", - "from _label import MultiLabelBinarizer", - "from _discretization import KBinsDiscretizer" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.preprocessing.tests.test_common", - "imports": [ - "import warnings", - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from sklearn.datasets import load_iris", - "from sklearn.model_selection import train_test_split", - "from sklearn.base import clone", - "from sklearn.preprocessing import maxabs_scale", - "from sklearn.preprocessing import minmax_scale", - "from sklearn.preprocessing import scale", - "from sklearn.preprocessing import power_transform", - "from sklearn.preprocessing import quantile_transform", - "from sklearn.preprocessing import robust_scale", - "from sklearn.preprocessing import MaxAbsScaler", - "from sklearn.preprocessing import MinMaxScaler", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.preprocessing import PowerTransformer", - "from sklearn.preprocessing import QuantileTransformer", - "from sklearn.preprocessing import RobustScaler", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "_get_valid_samples_by_column", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get non NaN samples in column of X" - }, - { - "name": "test_missing_value_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_missing_value_pandas_na_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_data", - "imports": [ - "import warnings", - "import itertools", - "import numpy as np", - "import numpy.linalg as la", - "from scipy import sparse", - "from scipy import stats", - "from scipy.sparse import random as sparse_random", - "import pytest", - "from sklearn.utils import gen_batches", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_less", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils.sparsefuncs import mean_variance_axis", - "from sklearn.preprocessing._data import _handle_zeros_in_scale", - "from sklearn.preprocessing._data import Binarizer", - "from sklearn.preprocessing._data import KernelCenterer", - "from sklearn.preprocessing._data import Normalizer", - "from sklearn.preprocessing._data import normalize", - "from sklearn.preprocessing._data import StandardScaler", - "from sklearn.preprocessing._data import scale", - "from sklearn.preprocessing._data import MinMaxScaler", - "from sklearn.preprocessing._data import minmax_scale", - "from sklearn.preprocessing._data import QuantileTransformer", - "from sklearn.preprocessing._data import quantile_transform", - "from sklearn.preprocessing._data import MaxAbsScaler", - "from sklearn.preprocessing._data import maxabs_scale", - "from sklearn.preprocessing._data import RobustScaler", - "from sklearn.preprocessing._data import robust_scale", - "from sklearn.preprocessing._data import add_dummy_feature", - "from sklearn.preprocessing._data import PolynomialFeatures", - "from sklearn.preprocessing._data import PowerTransformer", - "from sklearn.preprocessing._data import power_transform", - "from sklearn.preprocessing._data import BOUNDS_THRESHOLD", - "from sklearn.exceptions import NotFittedError", - "from sklearn.base import clone", - "from sklearn.pipeline import Pipeline", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.svm import SVR", - "from sklearn.utils import shuffle", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "toarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_dim_1axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_correct_incr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_feature_array_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csc_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_floats", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_zero_row", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_degree_4", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_features_csr_X_dim_edges", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_if_sample_weights_greater_than_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_2d_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_float16_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_handle_zeros_in_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minmax_scaler_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_partial_fit_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_partial_fit_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_trasform_with_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_scaler_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_scaler_zero_variance_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minmax_scale_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_scaler_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_without_centering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_n_samples_seen_with_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_identity_scalers_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_return_identity", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scaler_without_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_sparse_with_mean_raise_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_input_finiteness_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_error_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_col_zero_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_2d_arrays", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_equivalence_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_transform_one_row_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_iris_quantiles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_check_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_sparse_ignore_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_dense_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_subsampling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_sparse_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_bounds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_and_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transformer_sorted_quantiles", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_invalid_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scale_function_without_centering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scale_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scale_1d_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_zero_variance_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_robust_scaler_unit_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_zero_variance_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_large_negative_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_transform_one_row_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_maxabs_scaler_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_l1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_l2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalizer_max_sign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_center_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cv_pipeline_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature_coo", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_add_dummy_feature_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_cold_start", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_transform_valid_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_notfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_boxcox_strictly_positive_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_yeojohnson_any_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_shape_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_method_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_lambda_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_lambda_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_optimization_power_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_yeo_johnson_darwin_example", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_nans", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_copy_True", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_power_transformer_copy_False", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_standard_scaler_sparse_partial_fit_finite_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minmax_scaler_clip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_discretization", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "import warnings", - "from sklearn.preprocessing import KBinsDiscretizer", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_allclose_dense_sparse" - ], - "classes": [], - "functions": [ - { - "name": "test_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_valid_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_n_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_n_bins_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_transform_n_bins_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_same_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_1d_behavior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numeric_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_encode_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encode_options", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_strategy_option", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nonuniform_strategies", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_transform_outside_fit_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_overwrite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_redundant_bins", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_percentile_numeric_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_32_equal_64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_encoders", - "imports": [ - "import re", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "from sklearn.exceptions import NotFittedError", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils import is_scalar_nan", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.preprocessing import OrdinalEncoder" - ], - "classes": [], - "functions": [ - { - "name": "test_one_hot_encoder_sparse_dense", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_diff_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_handle_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_not_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_handle_unknown_strings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_dtype_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_feature_names_unicode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_categorical_onehot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_inverse_transform_raise_error_with_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that `inverse_transform` raise an error with unknown samples, no\ndropped feature, and `handle_unknow=\"error`.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/14934" - }, - { - "name": "test_one_hot_encoder_inverse_if_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_drop_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_is_not_1D", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_X_is_not_1D_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_specified_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_unsorted_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_specified_categories_mixed_columns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_feature_names_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_drop_equals_if_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_specified_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_raise_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_string", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_numeric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_handle_unknowns_nan_non_float_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordinal_encoder_raise_categories_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoder_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoder_dtypes_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_drop_manual", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_hot_encoder_invalid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_drop_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoders_has_categorical_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encoders_unicode_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that encoding work with string and object dtypes.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/issues/15616\nhttps://github.com/scikit-learn/scikit-learn/issues/15726" - }, - { - "name": "test_ohe_missing_values_get_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ohe_missing_value_support_pandas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ohe_missing_value_support_pandas_categorical", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_function_transformer", - "imports": [ - "import pytest", - "import numpy as np", - "from scipy import sparse", - "from sklearn.preprocessing import FunctionTransformer", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings" - ], - "classes": [], - "functions": [ - { - "name": "_make_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_delegate_to_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_np_log", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kw_arg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kw_arg_update", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kw_arg_reset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_inverse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_function_transformer_frame", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests.test_label", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import issparse", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils import _to_object_array", - "from sklearn.preprocessing._label import LabelBinarizer", - "from sklearn.preprocessing._label import MultiLabelBinarizer", - "from sklearn.preprocessing._label import LabelEncoder", - "from sklearn.preprocessing._label import label_binarize", - "from sklearn.preprocessing._label import _inverse_binarize_thresholding", - "from sklearn.preprocessing._label import _inverse_binarize_multiclass", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "toarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer_unseen_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer_set_label_encoding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarizer_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_negative_ints", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_str_bad_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_encoder_empty_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_output_multilabel_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_empty_sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_unknown_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_given_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_multiple_calls", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_same_length_sequence", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_non_integer_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_non_unique", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multilabel_binarizer_inverse_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_with_class_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_binarized_results", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_binarize_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_input_label_binarize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inverse_binarize_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.preprocessing.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.semi_supervised._label_propagation", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "import warnings", - "import numpy as np", - "from scipy import sparse", - "from scipy.sparse import csgraph", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from metrics.pairwise import rbf_kernel", - "from neighbors import NearestNeighbors", - "from utils.extmath import safe_sparse_dot", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "BaseLabelPropagation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['knn', 'rbf']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": " String identifier for kernel function to use or the kernel function itself. Only 'rbf' and 'knn' strings are valid inputs. The function passed should take two inputs, each of shape (n_samples, n_features), and return a (n_samples, n_samples) shaped weight matrix. gamma : float, default=20 Parameter for rbf kernel. n_neighbors : int, default=7 Parameter for knn kernel. Need to be strictly positive. alpha : float, default=1.0 Clamping factor. max_iter : int, default=30 Change maximum number of iterations allowed. tol : float, default=1e-3 Convergence tolerance: threshold to consider the system at steady state." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Performs inductive inference across the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n Predictions for input data." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probability for each possible outcome.\n\nCompute the probability estimates for each single sample in X\nand each possible outcome seen during training (categorical\ndistribution).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes)\n Normalized probability distributions across\n class labels." - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A matrix of shape (n_samples, n_samples) will be created from this." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "`n_labeled_samples` (unlabeled points are marked as -1) All unlabeled samples will be transductively assigned labels." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit a semi-supervised label propagation model based\n\nAll the input data is provided matrix X (labeled and unlabeled)\nand corresponding label matrix y with a dedicated marker value for\nunlabeled samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A matrix of shape (n_samples, n_samples) will be created from this.\n\ny : array-like of shape (n_samples,)\n `n_labeled_samples` (unlabeled points are marked as -1)\n All unlabeled samples will be transductively assigned labels.\n\nReturns\n-------\nself : object" - } - ], - "docstring": "Base class for label propagation module.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n Parameter for knn kernel. Need to be strictly positive.\n\n alpha : float, default=1.0\n Clamping factor.\n\n max_iter : int, default=30\n Change maximum number of iterations allowed.\n\n tol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n " - }, - { - "name": "LabelPropagation", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['knn', 'rbf']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "String identifier for kernel function to use or the kernel function itself. Only 'rbf' and 'knn' strings are valid inputs. The function passed should take two inputs, each of shape (n_samples, n_features), and return a (n_samples, n_samples) shaped weight matrix." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "Parameter for rbf kernel." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "7", - "limitation": null, - "ignored": false, - "docstring": "Parameter for knn kernel which need to be strictly positive." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "Change maximum number of iterations allowed." - }, - { - "name": "tol", - "type": "Union[Literal[1e-3], float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance: threshold to consider the system at steady state." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Matrix representing a fully connected graph between each sample\n\nThis basic implementation creates a non-stochastic affinity matrix, so\nclass distributions will exceed 1 (normalization may be desired)." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Label Propagation classifier\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel which need to be strictly positive.\n\nmax_iter : int, default=1000\n Change maximum number of iterations allowed.\n\ntol : float, 1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples)\n Label assigned to each item via the transduction.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelPropagation\n>>> label_prop_model = LabelPropagation()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelPropagation(...)\n\nReferences\n----------\nXiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\nwith label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\nUniversity, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\nSee Also\n--------\nLabelSpreading : Alternate label propagation strategy more robust to noise." - }, - { - "name": "LabelSpreading", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['knn', 'rbf']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "String identifier for kernel function to use or the kernel function itself. Only 'rbf' and 'knn' strings are valid inputs. The function passed should take two inputs, each of shape (n_samples, n_features), and return a (n_samples, n_samples) shaped weight matrix." - }, - { - "name": "gamma", - "type": "float", - "hasDefault": true, - "default": "20", - "limitation": null, - "ignored": false, - "docstring": "Parameter for rbf kernel." - }, - { - "name": "n_neighbors", - "type": "int", - "hasDefault": true, - "default": "7", - "limitation": null, - "ignored": false, - "docstring": "Parameter for knn kernel which is a strictly positive integer." - }, - { - "name": "alpha", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Clamping factor. A value in (0, 1) that specifies the relative amount that an instance should adopt the information from its neighbors as opposed to its initial label. alpha=0 means keeping the initial label information; alpha=1 means replacing all initial information." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "30", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations allowed." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Convergence tolerance: threshold to consider the system at steady state." - }, - { - "name": "n_jobs", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_build_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Graph matrix for Label Spreading computes the graph laplacian" - } - ], - "docstring": "LabelSpreading model for semi-supervised learning\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel which is a strictly positive integer.\n\nalpha : float, default=0.2\n Clamping factor. A value in (0, 1) that specifies the relative amount\n that an instance should adopt the information from its neighbors as\n opposed to its initial label.\n alpha=0 means keeping the initial label information; alpha=1 means\n replacing all initial information.\n\nmax_iter : int, default=30\n Maximum number of iterations allowed.\n\ntol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples,)\n Label assigned to each item via the transduction.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelSpreading\n>>> label_prop_model = LabelSpreading()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelSpreading(...)\n\nReferences\n----------\nDengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\nBernhard Schoelkopf. Learning with local and global consistency (2004)\nhttp://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.115.3219\n\nSee Also\n--------\nLabelPropagation : Unregularized graph based semi-supervised learning." - } - ], - "functions": [] - }, - { - "name": "sklearn.semi_supervised._self_training", - "imports": [ - "import warnings", - "import numpy as np", - "from base import MetaEstimatorMixin", - "from base import clone", - "from base import BaseEstimator", - "from utils.validation import check_is_fitted", - "from utils.metaestimators import if_delegate_has_method", - "from utils import safe_mask" - ], - "classes": [ - { - "name": "SelfTrainingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "base_estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator object implementing ``fit`` and ``predict_proba``. Invoking the ``fit`` method will fit a clone of the passed estimator, which will be stored in the ``base_estimator_`` attribute." - }, - { - "name": "criterion", - "type": "Literal['threshold', 'k_best']", - "hasDefault": true, - "default": "'threshold'", - "limitation": null, - "ignored": false, - "docstring": "The selection criterion used to select which labels to add to the training set. If 'threshold', pseudo-labels with prediction probabilities above `threshold` are added to the dataset. If 'k_best', the `k_best` pseudo-labels with highest prediction probabilities are added to the dataset. When using the 'threshold' criterion, a :ref:`well calibrated classifier ` should be used." - }, - { - "name": "threshold", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The decision threshold for use with `criterion='threshold'`. Should be in [0, 1). When using the 'threshold' criterion, a :ref:`well calibrated classifier ` should be used." - }, - { - "name": "k_best", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "The amount of samples to add in each iteration. Only used when `criterion` is k_best'." - }, - { - "name": "max_iter", - "type": "Optional[int]", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Maximum number of iterations allowed. Should be greater than or equal to 0. If it is ``None``, the classifier will continue to predict labels until no new pseudo-labels are added, or all unlabeled samples have been labeled." - }, - { - "name": "verbose: bool", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output." - }, - { - "name": "default=False", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the labels. Unlabeled samples should have the label -1." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fits this ``SelfTrainingClassifier`` to a dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\ny : {array-like, sparse matrix} of shape (n_samples,)\n Array representing the labels. Unlabeled samples should have the\n label -1.\n\nReturns\n-------\nself : object\n Returns an instance of self." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the classes of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n Array with predicted labels." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probability for each possible outcome.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Array with prediction probabilities." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calls decision function of the `base_estimator`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Result of the decision function of the `base_estimator`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict log probability for each possible outcome.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Array with log prediction probabilities." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the data." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array representing the labels." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calls score on the `base_estimator`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\ny : array-like of shape (n_samples,)\n Array representing the labels.\n\nReturns\n-------\nscore : float\n Result of calling score on the `base_estimator`." - } - ], - "docstring": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator object\n An estimator object implementing ``fit`` and ``predict_proba``.\n Invoking the ``fit`` method will fit a clone of the passed estimator,\n which will be stored in the ``base_estimator_`` attribute.\n\ncriterion : {'threshold', 'k_best'}, default='threshold'\n The selection criterion used to select which labels to add to the\n training set. If 'threshold', pseudo-labels with prediction\n probabilities above `threshold` are added to the dataset. If 'k_best',\n the `k_best` pseudo-labels with highest prediction probabilities are\n added to the dataset. When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\nthreshold : float, default=0.75\n The decision threshold for use with `criterion='threshold'`.\n Should be in [0, 1). When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\nk_best : int, default=10\n The amount of samples to add in each iteration. Only used when\n `criterion` is k_best'.\n\nmax_iter : int or None, default=10\n Maximum number of iterations allowed. Should be greater than or equal\n to 0. If it is ``None``, the classifier will continue to predict labels\n until no new pseudo-labels are added, or all unlabeled samples have\n been labeled.\n\nverbose: bool, default=False\n Enable verbose output.\n\nAttributes\n----------\nbase_estimator_ : estimator object\n The fitted estimator.\n\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output. (Taken from the trained\n ``base_estimator_``).\n\ntransduction_ : ndarray of shape (n_samples,)\n The labels used for the final fit of the classifier, including\n pseudo-labels added during fit.\n\nlabeled_iter_ : ndarray of shape (n_samples,)\n The iteration in which each sample was labeled. When a sample has\n iteration 0, the sample was already labeled in the original dataset.\n When a sample has iteration -1, the sample was not labeled in any\n iteration.\n\nn_iter_ : int\n The number of rounds of self-training, that is the number of times the\n base estimator is fitted on relabeled variants of the training set.\n\ntermination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n The reason that fitting was stopped.\n\n - 'max_iter': `n_iter_` reached `max_iter`.\n - 'no_change': no new labels were predicted.\n - 'all_labeled': all unlabeled samples were labeled before `max_iter`\n was reached.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import SelfTrainingClassifier\n>>> from sklearn.svm import SVC\n>>> rng = np.random.RandomState(42)\n>>> iris = datasets.load_iris()\n>>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n>>> iris.target[random_unlabeled_points] = -1\n>>> svc = SVC(probability=True, gamma=\"auto\")\n>>> self_training_model = SelfTrainingClassifier(svc)\n>>> self_training_model.fit(iris.data, iris.target)\nSelfTrainingClassifier(...)\n\nReferences\n----------\nDavid Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\nsupervised methods. In Proceedings of the 33rd annual meeting on\nAssociation for Computational Linguistics (ACL '95). Association for\nComputational Linguistics, Stroudsburg, PA, USA, 189-196. DOI:\nhttps://doi.org/10.3115/981658.981684" - } - ], - "functions": [ - { - "name": "_validate_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that an estimator implements the necessary methods." - } - ] - }, - { - "name": "sklearn.semi_supervised", - "imports": [ - "from _label_propagation import LabelPropagation", - "from _label_propagation import LabelSpreading", - "from _self_training import SelfTrainingClassifier" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.semi_supervised.tests.test_label_propagation", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy.sparse import issparse", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.semi_supervised import _label_propagation as label_propagation", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.model_selection import train_test_split", - "from sklearn.neighbors import NearestNeighbors", - "from sklearn.datasets import make_classification", - "from sklearn.exceptions import ConvergenceWarning", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_fit_transduction", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_spreading_closed_form", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_propagation_closed_form", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_valid_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_speed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convergence_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_label_propagation_non_zero_normalizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_sparse_callable_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.semi_supervised.tests.test_self_training", - "imports": [ - "from math import ceil", - "import numpy as np", - "from numpy.testing import assert_array_equal", - "import pytest", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.svm import SVC", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import load_iris", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import accuracy_score", - "from sklearn.semi_supervised import SelfTrainingClassifier" - ], - "classes": [], - "functions": [ - { - "name": "test_missing_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_none_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_params_selection_crit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warns_k_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sanity_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_none_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_zero_iterations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prefitted_throws_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_labeled_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_unlabeled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_early_stopping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_strings_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose_k_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_k_best_selects_best", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_estimator_meta_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.semi_supervised.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.svm.setup", - "imports": [ - "import os", - "from os.path import join", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.svm._base", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from None import _libsvm as libsvm", - "from None import _liblinear as liblinear", - "from None import _libsvm_sparse as libsvm_sparse", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from preprocessing import LabelEncoder", - "from utils.multiclass import _ovr_decision_function", - "from utils import check_array", - "from utils import check_random_state", - "from utils import column_or_1d", - "from utils import compute_class_weight", - "from utils.deprecation import deprecated", - "from utils.extmath import safe_sparse_dot", - "from utils.validation import check_is_fitted", - "from utils.validation import _check_large_sparse", - "from utils.validation import _num_samples", - "from utils.validation import _check_sample_weight", - "from utils.validation import check_consistent_length", - "from utils.multiclass import check_classification_targets", - "from exceptions import ConvergenceWarning", - "from exceptions import NotFittedError" - ], - "classes": [ - { - "name": "BaseLibSVM", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. For kernel=\"precomputed\", the expected shape of X is (n_samples, n_samples)." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values (class labels in classification, real numbers in regression)." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the SVM model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples, n_samples).\n\ny : array-like of shape (n_samples,)\n Target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nIf X and y are not C-ordered and contiguous arrays of np.float64 and\nX is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\nIf X is a dense array, then the other methods will not support sparse\nmatrices as input." - }, - { - "name": "_validate_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validation of y and class_weight.\n\nDefault implementation for SVR and one-class; overridden in BaseSVC." - }, - { - "name": "_warn_from_fit_status", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_dense_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform regression on samples in X.\n\nFor an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)" - }, - { - "name": "_dense_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_compute_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the data transformed by a callable kernel" - }, - { - "name": "_decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluates the decision function for the samples in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n Returns the decision function of the sample for each class\n in the model." - }, - { - "name": "_dense_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_for_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "n_support_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class." - }, - { - "name": "BaseSVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Evaluates the decision function for the samples in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n If decision_function_shape='ovr', the shape is (n_samples,\n n_classes).\n\nNotes\n-----\nIf decision_function_shape='ovo', the function values are proportional\nto the distance of the samples X to the separating hyperplane. If the\nexact distances are required, divide the function values by the norm of\nthe weight vector (``coef_``). See also `this question\n`_ for further details.\nIf decision_function_shape='ovr', the decision function is a monotonic\ntransformation of ovo decision function." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on samples in X.\n\nFor an one-class model, +1 or -1 is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Class labels for samples in X." - }, - { - "name": "_check_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\nT : ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\nNotes\n-----\nThe probability model is created using cross validation, so\nthe results can be slightly different than those obtained by\npredict. Also, it will produce meaningless results on very small\ndatasets." - }, - { - "name": "_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute log probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\nT : ndarray of shape (n_samples, n_classes)\n Returns the log-probabilities of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\nNotes\n-----\nThe probability model is created using cross validation, so\nthe results can be slightly different than those obtained by\npredict. Also, it will produce meaningless results on very small\ndatasets." - }, - { - "name": "_predict_log_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_dense_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probA_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probB_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "ABC for LibSVM-based classifiers." - } - ], - "functions": [ - { - "name": "_one_vs_one_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate primal coefficients from dual coefficients\nfor the one-vs-one multi class LibSVM in the case\nof a linear kernel." - }, - { - "name": "_get_liblinear_solver_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n - multi_class\n - penalty\n - loss\n - dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use." - }, - { - "name": "_fit_liblinear", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X" - }, - { - "name": "C", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Inverse of cross-validation parameter. Lower the C, the more the penalization." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to fit the intercept, that is to add a intercept term to the decision function." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "LibLinear internally penalizes the intercept and this term is subject to regularization just like the other terms of the feature vector. In order to avoid this, one should increase the intercept_scaling. such that the feature vector becomes [x, intercept_scaling]." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "penalty", - "type": "Literal['l1', 'l2']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The norm of the penalty used in regularization." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dual or primal formulation," - }, - { - "name": "verbose", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set verbose to any positive number for verbosity." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping condition." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'crammer_singer']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "`ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer` optimizes a joint objective over all classes. While `crammer_singer` is interesting from an theoretical perspective as it is consistent it is seldom used in practice and rarely leads to better accuracy and is more expensive to compute. If `crammer_singer` is chosen, the options loss, penalty and dual will be ignored." - }, - { - "name": "loss", - "type": "Literal['logistic_regression', 'hinge', 'squared_hinge', 'epsilon_insensitive']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The loss function used to fit the model." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon parameter in the epsilon-insensitive loss function. Note that the value of this parameter depends on the scale of the target variable y. If unsure, set epsilon=0." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights assigned to each sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.\n\nPreprocessing is done in this function before supplying it to liblinear.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X\n\nC : float\n Inverse of cross-validation parameter. Lower the C, the more\n the penalization.\n\nfit_intercept : bool\n Whether or not to fit the intercept, that is to add a intercept\n term to the decision function.\n\nintercept_scaling : float\n LibLinear internally penalizes the intercept and this term is subject\n to regularization just like the other terms of the feature vector.\n In order to avoid this, one should increase the intercept_scaling.\n such that the feature vector becomes [x, intercept_scaling].\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\npenalty : {'l1', 'l2'}\n The norm of the penalty used in regularization.\n\ndual : bool\n Dual or primal formulation,\n\nverbose : int\n Set verbose to any positive number for verbosity.\n\nmax_iter : int\n Number of iterations.\n\ntol : float\n Stopping condition.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`\n optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from an theoretical perspective\n as it is consistent it is seldom used in practice and rarely leads to\n better accuracy and is more expensive to compute.\n If `crammer_singer` is chosen, the options loss, penalty and dual will\n be ignored.\n\nloss : {'logistic_regression', 'hinge', 'squared_hinge', 'epsilon_insensitive', 'squared_epsilon_insensitive}, default='logistic_regression'\n The loss function used to fit the model.\n\nepsilon : float, default=0.1\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set epsilon=0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights assigned to each sample.\n\nReturns\n-------\ncoef_ : ndarray of shape (n_features, n_features + 1)\n The coefficient vector got by minimizing the objective function.\n\nintercept_ : float\n The intercept term added to the vector.\n\nn_iter_ : int\n Maximum number of iterations run across all classes." - } - ] - }, - { - "name": "sklearn.svm._bounds", - "imports": [ - "import numpy as np", - "from preprocessing import LabelBinarizer", - "from utils.validation import check_consistent_length", - "from utils.validation import check_array", - "from utils.validation import _deprecate_positional_args", - "from utils.extmath import safe_sparse_dot" - ], - "classes": [], - "functions": [ - { - "name": "l1_min_c", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "loss", - "type": "Literal['squared_hinge', 'log']", - "hasDefault": true, - "default": "'squared_hinge'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the loss function. With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss). With 'log' it is the loss of logistic regression models." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Specifies if the intercept should be fitted by the model. It must match the fit() method parameter." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "when fit_intercept is True, instance vector x becomes [x, intercept_scaling], i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. It must match the fit() method parameter." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the lowest bound for C such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nloss : {'squared_hinge', 'log'}, default='squared_hinge'\n Specifies the loss function.\n With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n With 'log' it is the loss of logistic regression models.\n\nfit_intercept : bool, default=True\n Specifies if the intercept should be fitted by the model.\n It must match the fit() method parameter.\n\nintercept_scaling : float, default=1.0\n when fit_intercept is True, instance vector x becomes\n [x, intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n It must match the fit() method parameter.\n\nReturns\n-------\nl1_min_c : float\n minimum value for C" - } - ] - }, - { - "name": "sklearn.svm._classes", - "imports": [ - "import numpy as np", - "from _base import _fit_liblinear", - "from _base import BaseSVC", - "from _base import BaseLibSVM", - "from base import BaseEstimator", - "from base import RegressorMixin", - "from base import OutlierMixin", - "from linear_model._base import LinearClassifierMixin", - "from linear_model._base import SparseCoefMixin", - "from linear_model._base import LinearModel", - "from utils.validation import _num_samples", - "from utils.validation import _deprecate_positional_args", - "from utils.multiclass import check_classification_targets", - "from utils.deprecation import deprecated" - ], - "classes": [ - { - "name": "LinearSVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "penalty", - "type": "Literal['l1', 'l2']", - "hasDefault": true, - "default": "'l2'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the norm used in the penalization. The 'l2' penalty is the standard used in SVC. The 'l1' leads to ``coef_`` vectors that are sparse." - }, - { - "name": "loss", - "type": "Literal['hinge', 'squared_hinge']", - "hasDefault": true, - "default": "'squared_hinge'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the loss function. 'hinge' is the standard SVM loss (used e.g. by the SVC class) while 'squared_hinge' is the square of the hinge loss. The combination of ``penalty='l1'`` and ``loss='hinge'`` is not supported." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive." - }, - { - "name": "multi_class", - "type": "Literal['ovr', 'crammer_singer']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "Determines the multi-class strategy if `y` contains more than two classes. ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while ``\"crammer_singer\"`` optimizes a joint objective over all classes. While `crammer_singer` is interesting from a theoretical perspective as it is consistent, it is seldom used in practice as it rarely leads to better accuracy and is more expensive to compute. If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual will be ignored." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be already centered)." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When self.fit_intercept is True, instance vector x becomes ``[x, self.intercept_scaling]``, i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set the parameter C of class i to ``class_weight[i]*C`` for SVC. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in liblinear that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data for the dual coordinate descent (if ``dual=True``). When ``dual=False`` the underlying implementation of :class:`LinearSVC` is not random and ``random_state`` has no effect on the results. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations to be run." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n An instance of the estimator." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npenalty : {'l1', 'l2'}, default='l2'\n Specifies the norm used in the penalization. The 'l2'\n penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n vectors that are sparse.\n\nloss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n Specifies the loss function. 'hinge' is the standard SVM loss\n (used e.g. by the SVC class) while 'squared_hinge' is the\n square of the hinge loss. The combination of ``penalty='l1'``\n and ``loss='hinge'`` is not supported.\n\ndual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n Determines the multi-class strategy if `y` contains more than\n two classes.\n ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from a theoretical perspective\n as it is consistent, it is seldom used in practice as it rarely leads\n to better accuracy and is more expensive to compute.\n If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n will be ignored.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1\n When self.fit_intercept is True, instance vector x becomes\n ``[x, self.intercept_scaling]``,\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n Set the parameter C of class i to ``class_weight[i]*C`` for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n underlying implementation of :class:`LinearSVC` is not random and\n ``random_state`` has no effect on the results.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_iter : int, default=1000\n The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n ``coef_`` is a readonly property derived from ``raw_coef_`` that\n follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.\n\nSee Also\n--------\nSVC : Implementation of Support Vector Machine classifier using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\n Furthermore SVC multi-class mode is implemented using one\n vs one scheme while LinearSVC uses one vs the rest. It is\n possible to implement one vs the rest with SVC by using the\n :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n Finally SVC can fit dense data without memory copy if the input\n is C-contiguous. Sparse data will still incur memory copy though.\n\nsklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n cost function as LinearSVC\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon\nto have slightly different results for the same input data. If\nthat happens, try with a smaller ``tol`` parameter.\n\nThe underlying implementation, liblinear, uses a sparse internal\nrepresentation for the data that will incur a memory copy.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear `\nin the narrative documentation.\n\nReferences\n----------\n`LIBLINEAR: A Library for Large Linear Classification\n`__\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = make_pipeline(StandardScaler(),\n... LinearSVC(random_state=0, tol=1e-5))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n>>> print(clf.named_steps['linearsvc'].coef_)\n[[0.141... 0.526... 0.679... 0.493...]]\n\n>>> print(clf.named_steps['linearsvc'].intercept_)\n[0.1693...]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]" - }, - { - "name": "LinearSVR", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon parameter in the epsilon-insensitive loss function. Note that the value of this parameter depends on the scale of the target variable y. If unsure, set ``epsilon=0``." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criteria." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive." - }, - { - "name": "loss", - "type": "Literal['epsilon_insensitive', 'squared_epsilon_insensitive']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Specifies the loss function. The epsilon-insensitive loss (standard SVR) is the L1 loss, while the squared epsilon-insensitive loss ('squared_epsilon_insensitive') is the L2 loss." - }, - { - "name": "fit_intercept", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be already centered)." - }, - { - "name": "intercept_scaling", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "When self.fit_intercept is True, instance vector x becomes [x, self.intercept_scaling], i.e. a \"synthetic\" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased." - }, - { - "name": "dual", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features." - }, - { - "name": "verbose", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in liblinear that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "1000", - "limitation": null, - "ignored": false, - "docstring": "The maximum number of iterations to be run." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples in the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target vector relative to X" - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. .. versionadded:: 0.18" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n An instance of the estimator." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nepsilon : float, default=0.0\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set ``epsilon=0``.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\nloss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, default='epsilon_insensitive'\n Specifies the loss function. The epsilon-insensitive loss\n (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n loss ('squared_epsilon_insensitive') is the L2 loss.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1.\n When self.fit_intercept is True, instance vector x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\ndual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\nverbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_iter : int, default=1000\n The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is a readonly property derived from `raw_coef_` that\n follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n Constants in decision function.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = make_pipeline(StandardScaler(),\n... LinearSVR(random_state=0, tol=1e-5))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n>>> print(regr.named_steps['linearsvr'].coef_)\n[18.582... 27.023... 44.357... 64.522...]\n>>> print(regr.named_steps['linearsvr'].intercept_)\n[-4...]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-2.384...]\n\n\nSee Also\n--------\nLinearSVC : Implementation of Support Vector Machine classifier using the\n same library as this class (liblinear).\n\nSVR : Implementation of Support Vector Machine regression using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\nsklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n function as LinearSVR\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes." - }, - { - "name": "SVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "probability", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - }, - { - "name": "decision_function_shape", - "type": "Literal['ovo', 'ovr']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "Whether to return a one-vs-rest ('ovr') decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one ('ovo') decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one ('ovo') is always used as multi-class strategy. The parameter is ignored for binary classification. .. versionchanged:: 0.19 decision_function_shape is 'ovr' by default. .. versionadded:: 0.17 *decision_function_shape='ovr'* is recommended. .. versionchanged:: 0.17 Deprecated *decision_function_shape='ovo' and None*." - }, - { - "name": "break_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, ``decision_function_shape='ovr'``, and number of classes > 2, :term:`predict` will break ties according to the confidence values of :term:`decision_function`; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict. .. versionadded:: 0.22" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when `probability` is False. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive. The penalty\n is a squared l2 penalty.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to pre-compute the kernel matrix from data matrices; that matrix\n should be an array of shape ``(n_samples, n_samples)``.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\nprobability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nclass_weight : dict or 'balanced', default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is a readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes -1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\nsupport_ : ndarray of shape (n_SV)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.svm import SVC\n>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('svc', SVC(gamma='auto'))])\n\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nSVR : Support Vector Machine for Regression implemented using libsvm.\n\nLinearSVC : Scalable Linear Support Vector Machine for classification\n implemented using liblinear. Check the See Also section of\n LinearSVC for more comparison element.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "NuSVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "An upper bound on the fraction of margin errors (see :ref:`User Guide `) and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "probability", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies as ``n_samples / (n_classes * np.bincount(y))``" - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - }, - { - "name": "decision_function_shape", - "type": "Literal['ovo', 'ovr']", - "hasDefault": true, - "default": "'ovr'", - "limitation": null, - "ignored": false, - "docstring": "Whether to return a one-vs-rest ('ovr') decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one ('ovo') decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one ('ovo') is always used as multi-class strategy. The parameter is ignored for binary classification. .. versionchanged:: 0.19 decision_function_shape is 'ovr' by default. .. versionadded:: 0.17 *decision_function_shape='ovr'* is recommended. .. versionchanged:: 0.17 Deprecated *decision_function_shape='ovo' and None*." - }, - { - "name": "break_ties", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true, ``decision_function_shape='ovr'``, and number of classes > 2, :term:`predict` will break ties according to the confidence values of :term:`decision_function`; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict. .. versionadded:: 0.22" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when `probability` is False. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnu : float, default=0.5\n An upper bound on the fraction of margin errors (see :ref:`User Guide\n `) and a lower bound of the fraction of support vectors.\n Should be in the interval (0, 1].\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\nprobability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nclass_weight : {dict, 'balanced'}, default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one. The \"balanced\" mode uses the values of y to automatically\n adjust weights inversely proportional to class frequencies as\n ``n_samples / (n_classes * np.bincount(y))``\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C of each class.\n Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\nfit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nfit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.svm import NuSVC\n>>> clf = make_pipeline(StandardScaler(), NuSVC())\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nSVC : Support Vector Machine for classification using libsvm.\n\nLinearSVC : Scalable linear Support Vector Machine for classification using\n liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "SVR", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty." - }, - { - "name": "epsilon", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probA_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probB_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n The penalty is a squared l2 penalty.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n within which no penalty is associated in the training loss function\n with points predicted within a distance epsilon from the actual\n value.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constants in decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('svr', SVR(epsilon=0.2))])\n\nSee Also\n--------\nNuSVR : Support Vector Machine for regression implemented using libsvm\n using a parameter to control the number of support vectors.\n\nLinearSVR : Scalable Linear Support Vector Machine for regression\n implemented using liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "NuSVR", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. By default 0.5 will be taken." - }, - { - "name": "C", - "type": "float", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Penalty parameter C of the error term." - }, - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnu : float, default=0.5\n An upper bound on the fraction of training errors and a lower bound of\n the fraction of support vectors. Should be in the interval (0, 1]. By\n default 0.5 will be taken.\n\nC : float, default=1.0\n Penalty parameter C of the error term.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constants in decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import NuSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> np.random.seed(0)\n>>> y = np.random.randn(n_samples)\n>>> X = np.random.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('nusvr', NuSVR(nu=0.1))])\n\nSee Also\n--------\nNuSVC : Support Vector Machine for classification implemented with libsvm\n with a parameter to control the number of support vectors.\n\nSVR : Epsilon Support Vector Machine for regression implemented with\n libsvm.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_" - }, - { - "name": "OneClassSVM", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kernel", - "type": "Literal['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']", - "hasDefault": true, - "default": "'rbf'", - "limitation": null, - "ignored": false, - "docstring": "Specifies the kernel type to be used in the algorithm. It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or a callable. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix." - }, - { - "name": "degree", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Degree of the polynomial kernel function ('poly'). Ignored by all other kernels." - }, - { - "name": "gamma", - "type": "Literal['scale', 'auto']", - "hasDefault": true, - "default": "'scale'", - "limitation": null, - "ignored": false, - "docstring": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'." - }, - { - "name": "coef0", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-3", - "limitation": null, - "ignored": false, - "docstring": "Tolerance for stopping criterion." - }, - { - "name": "nu", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. By default 0.5 will be taken." - }, - { - "name": "shrinking", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use the shrinking heuristic. See the :ref:`User Guide `." - }, - { - "name": "cache_size", - "type": "float", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Specify the size of the kernel cache (in MB)." - }, - { - "name": "verbose", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "-1", - "limitation": null, - "ignored": false, - "docstring": "Hard limit on iterations within solver, or -1 for no limit." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set of samples, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points." - }, - { - "name": "y", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": true, - "docstring": "not used, present for API consistency by convention." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Detects the soft boundary of the set of samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Set of samples, where n_samples is the number of samples and\n n_features is the number of features.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\ny : Ignored\n not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nIf X is not a C-ordered contiguous array it is copied." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Signed distance to the separating hyperplane.\n\nSigned distance is positive for an inlier and negative for an outlier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndec : ndarray of shape (n_samples,)\n Returns the decision function of the samples." - }, - { - "name": "score_samples", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raw scoring function of the samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nscore_samples : ndarray of shape (n_samples,)\n Returns the (unshifted) scoring function of the samples." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "For kernel=\"precomputed\", the expected shape of X is (n_samples_test, n_samples_train)." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Perform classification on samples in X.\n\nFor a one-class model, +1 or -1 is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Class labels for samples in X." - }, - { - "name": "probA_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "probB_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\nnu : float, default=0.5\n An upper bound on the fraction of training\n errors and a lower bound of the fraction of support\n vectors. Should be in the interval (0, 1]. By default 0.5\n will be taken.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vectors in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constant in the decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: decision_function = score_samples - `offset_`.\n The offset is the opposite of `intercept_` and is provided for\n consistency with other outlier detection algorithms.\n\n .. versionadded:: 0.20\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import OneClassSVM\n>>> X = [[0], [0.44], [0.45], [0.46], [1]]\n>>> clf = OneClassSVM(gamma='auto').fit(X)\n>>> clf.predict(X)\narray([-1, 1, 1, 1, -1])\n>>> clf.score_samples(X)\narray([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])" - } - ], - "functions": [] - }, - { - "name": "sklearn.svm", - "imports": [ - "from _classes import SVC", - "from _classes import NuSVC", - "from _classes import SVR", - "from _classes import NuSVR", - "from _classes import OneClassSVM", - "from _classes import LinearSVC", - "from _classes import LinearSVR", - "from _bounds import l1_min_c" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.svm.tests.test_bounds", - "imports": [ - "import numpy as np", - "from scipy import sparse as sp", - "from scipy import stats", - "import pytest", - "from sklearn.svm._bounds import l1_min_c", - "from sklearn.svm import LinearSVC", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.svm._newrand import set_seed_wrap", - "from sklearn.svm._newrand import bounded_rand_int_wrap", - "from sklearn.utils._testing import assert_raise_message" - ], - "classes": [], - "functions": [ - { - "name": "test_l1_min_c", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_l1_min_c_l2_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_l1_min_c", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ill_posed_min_c", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unsupported_loss", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_newrand_set_seed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `set_seed` produces deterministic results" - }, - { - "name": "test_newrand_set_seed_overflow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `set_seed_wrap` is defined for unsigned 32bits ints" - }, - { - "name": "test_newrand_bounded_rand_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `bounded_rand_int` follows a uniform distribution" - }, - { - "name": "test_newrand_bounded_rand_int_limits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that `bounded_rand_int_wrap` is defined for unsigned 32bits ints" - } - ] - }, - { - "name": "sklearn.svm.tests.test_sparse", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from scipy import sparse", - "from sklearn import datasets", - "from sklearn import svm", - "from sklearn import linear_model", - "from sklearn import base", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import make_blobs", - "from sklearn.svm.tests import test_svm", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.utils.extmath import safe_sparse_dot", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import skip_if_32bit" - ], - "classes": [], - "functions": [ - { - "name": "check_svm_model_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that sparse SVC gives the same result as SVC" - }, - { - "name": "test_unsorted_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_with_custom_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_liblinear_intercept_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_oneclasssvm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_realdata", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_svc_clone_with_callable_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_timeout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.svm.tests.test_svm", - "imports": [ - "import numpy as np", - "import itertools", - "import pytest", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_allclose", - "from scipy import sparse", - "from sklearn import svm", - "from sklearn import linear_model", - "from sklearn import datasets", - "from sklearn import metrics", - "from sklearn import base", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.model_selection import train_test_split", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.metrics import f1_score", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils import shuffle", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import NotFittedError", - "from sklearn.exceptions import UndefinedMetricWarning", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.svm import _libsvm", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.utils import compute_class_weight", - "import os" - ], - "classes": [], - "functions": [ - { - "name": "test_libsvm_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_libsvm_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvr_fit_sampleweight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_oneclass_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tweak_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_function_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_classifier_sided_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_regressor_sided_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_equivalence_sample_weight_C", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_sample_weights_mask_all_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_weights_svc_leave_just_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_weights_svc_leave_two_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_negative_weight_equal_coeffs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_auto_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bad_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_gamma_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unicode_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_fit_support_vectors_empty", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_svx_uppercase_loss_penality_raises_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_crammer_singer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_fit_sampleweight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_crammer_singer_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dense_liblinear_intercept_handling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_liblinear_set_coef", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_immutable_coef_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvc_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_clone_with_callable_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_bad_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_timeout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_consistent_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_svm_convergence_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svr_coef_sign", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linear_svc_intercept_scaling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lsvc_intercept_scaling_zero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasattr_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_function_shape_two_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_invalid_break_ties_param", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svc_ovr_tie_breaking", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if predict breaks ties in OVR mode.\nRelated issue: https://github.com/scikit-learn/scikit-learn/issues/8277" - }, - { - "name": "test_gamma_auto", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gamma_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_linearsvm_liblinear_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_support_oneclass_svr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svm_probA_proB_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_custom_kernel_not_array_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test using a custom kernel that is not fed with array-like for floats" - } - ] - }, - { - "name": "sklearn.svm.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.tests.test_base", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "import sklearn", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.base import BaseEstimator", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.base import _is_pairwise", - "from sklearn.svm import SVC", - "from sklearn.pipeline import Pipeline", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.decomposition import KernelPCA", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn import datasets", - "from sklearn.base import TransformerMixin", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn import config_context", - "import pickle", - "from sklearn.feature_selection import SelectFpr", - "from sklearn.feature_selection import f_classif" - ], - "classes": [ - { - "name": "MyEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "K", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "T", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoNaNTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "OverrideTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DiamondOverwriteTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "InheritDiamondOverwriteTag", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "ModifyInitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Deprecated behavior.\nEqual parameters but with a type cast.\nDoesn't fulfill a is a" - }, - { - "name": "Buggy", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A buggy estimator that does not set its parameters right. " - }, - { - "name": "NoEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "VargEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "scikit-learn estimators shouldn't have vargs." - }, - { - "name": "TreeBadVersion", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TreeNoVersion", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DontPickleAttributeMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MultiInheritanceEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SingleInheritanceEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_clone", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_buggy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_empty_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_sparse_matrices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_estimator_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_class_rather_than_instance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_str", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params_passes_all_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params_updates_valid_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_clone_pandas_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_warning_is_not_raised_with_matching_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_warning_is_issued_upon_different_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_warning_is_issued_when_no_version_info_in_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_when_getstate_is_overwritten_by_mixin", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickling_works_when_getstate_is_overwritten_in_the_child_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tag_inheritance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_on_get_params_non_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr_mimebundle_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_repr_html_wraps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_build", - "imports": [ - "import os", - "import pytest", - "import textwrap", - "from sklearn import __version__", - "from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled" - ], - "classes": [], - "functions": [ - { - "name": "test_openmp_parallelism_enabled", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_calibration", - "imports": [ - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from scipy import sparse", - "from sklearn.base import BaseEstimator", - "from sklearn.model_selection import LeaveOneOut", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.extmath import softmax", - "from sklearn.exceptions import NotFittedError", - "from sklearn.datasets import make_classification", - "from sklearn.datasets import make_blobs", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.model_selection import KFold", - "from sklearn.model_selection import cross_val_predict", - "from sklearn.naive_bayes import MultinomialNB", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.svm import LinearSVC", - "from sklearn.isotonic import IsotonicRegression", - "from sklearn.feature_extraction import DictVectorizer", - "from sklearn.pipeline import Pipeline", - "from sklearn.impute import SimpleImputer", - "from sklearn.metrics import brier_score_loss", - "from sklearn.calibration import CalibratedClassifierCV", - "from sklearn.calibration import _sigmoid_calibration", - "from sklearn.calibration import _SigmoidCalibration", - "from sklearn.calibration import calibration_curve" - ], - "classes": [], - "functions": [ - { - "name": "data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_bad_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_default_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_cv_splitter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_parallel_execution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test parallel calibration" - }, - { - "name": "test_calibration_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_prefit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test calibration for prefitted classifiers" - }, - { - "name": "test_calibration_ensemble_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sigmoid_calibration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test calibration values with Platt sigmoid model" - }, - { - "name": "test_calibration_curve", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check calibration_curve function" - }, - { - "name": "test_calibration_nan_imputer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that calibration can accept nan" - }, - { - "name": "test_calibration_prob_sum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_less_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_accepts_ndarray", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that calibration accepts n-dimensional arrays as input" - }, - { - "name": "text_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "text_data_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibration_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_calibrated_classifier_cv_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_check_build", - "imports": [ - "from sklearn.__check_build import raise_build_error", - "from sklearn.utils._testing import assert_raises" - ], - "classes": [], - "functions": [ - { - "name": "test_raise_build_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_common", - "imports": [ - "import os", - "import warnings", - "import sys", - "import re", - "import pkgutil", - "from inspect import isgenerator", - "from functools import partial", - "import pytest", - "from sklearn.utils import all_estimators", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.exceptions import ConvergenceWarning", - "from sklearn.exceptions import FitFailedWarning", - "from sklearn.utils.estimator_checks import check_estimator", - "import sklearn", - "from sklearn.base import BiclusterMixin", - "from sklearn.decomposition import PCA", - "from sklearn.linear_model._base import LinearClassifierMixin", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Ridge", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RandomizedSearchCV", - "from sklearn.pipeline import make_pipeline", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils._testing import SkipTest", - "from sklearn.utils.estimator_checks import _construct_instance", - "from sklearn.utils.estimator_checks import _set_checking_parameters", - "from sklearn.utils.estimator_checks import _get_check_estimator_ids", - "from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier", - "from sklearn.utils.estimator_checks import parametrize_with_checks", - "from sklearn.utils.estimator_checks import check_n_features_in_after_fitting" - ], - "classes": [], - "functions": [ - { - "name": "test_all_estimator_no_base_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sample_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_check_estimator_ids", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_tested_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimators", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_generate_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_configure", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_tested_linear_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_balanced_linear_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_import_all_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_root_import_all_completeness", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_tests_are_importable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_support_removed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_search_cv_instances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_n_features_in_after_fitting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_config", - "imports": [ - "from sklearn import get_config", - "from sklearn import set_config", - "from sklearn import config_context", - "from sklearn.utils._testing import assert_raises" - ], - "classes": [], - "functions": [ - { - "name": "test_config_context", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_config_context_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_config", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_discriminant_analysis", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import linalg", - "from sklearn.utils import check_random_state", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.datasets import make_blobs", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis", - "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis", - "from sklearn.discriminant_analysis import _cov", - "from sklearn.covariance import ledoit_wolf", - "from sklearn.cluster import KMeans", - "from sklearn.covariance import ShrunkCovariance", - "from sklearn.covariance import LedoitWolf", - "from sklearn.preprocessing import StandardScaler" - ], - "classes": [], - "functions": [ - { - "name": "test_lda_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_coefs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_explained_variance_ratio", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_orthogonality", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_scaling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_store_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_shrinkage", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_ledoitwolf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_dimension_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_dtype_match", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_lda_numeric_consistency_float32_float64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda_store_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_qda_regularization", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_covariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises_value_error_on_same_number_of_classes_and_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests that if the number of samples equals the number\nof classes, a ValueError is raised." - } - ] - }, - { - "name": "sklearn.tests.test_docstring_parameters", - "imports": [ - "import inspect", - "import warnings", - "import importlib", - "from pkgutil import walk_packages", - "from inspect import signature", - "import numpy as np", - "import sklearn", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils._testing import check_docstring_parameters", - "from sklearn.utils._testing import _get_func_name", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils import all_estimators", - "from sklearn.utils.estimator_checks import _enforce_estimator_tags_y", - "from sklearn.utils.estimator_checks import _enforce_estimator_tags_x", - "from sklearn.utils.estimator_checks import _construct_instance", - "from sklearn.utils.deprecation import _is_deprecated", - "from sklearn.externals._pep562 import Pep562", - "from sklearn.datasets import make_classification", - "from sklearn.linear_model import LogisticRegression", - "import pytest", - "from numpydoc import docscrape" - ], - "classes": [], - "functions": [ - { - "name": "test_docstring_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tabs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_construct_searchcv_instance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_docstring_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_dummy", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.base import clone", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils.stats import _weighted_percentile", - "from sklearn.dummy import DummyClassifier", - "from sklearn.dummy import DummyRegressor", - "from sklearn.exceptions import NotFittedError" - ], - "classes": [], - "functions": [ - { - "name": "_check_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_behavior_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_behavior_2d_for_constant", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_equality_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy_with_2d_column_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_string_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_score_with_None", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_prediction_independent_of_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_median_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_median_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_quantile_strategy_empty_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_y_mean_attribute_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unknown_strategey_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constants_not_specified_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_size_multioutput_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_constant_strategy_sparse_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_strategy_sparse_target_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stratified_strategy_sparse_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_most_frequent_and_prior_strategy_sparse_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_regressor_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_regressor_on_3D_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_classifier_on_3D_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dummy_regressor_return_std", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_score_with_None", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_prediction_independent_of_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dtype_of_classifier_probas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_init", - "imports": [ - "from sklearn import *" - ], - "classes": [], - "functions": [ - { - "name": "test_import_skl", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_isotonic", - "imports": [ - "import warnings", - "import numpy as np", - "import pickle", - "import copy", - "import pytest", - "from sklearn.isotonic import check_increasing", - "from sklearn.isotonic import isotonic_regression", - "from sklearn.isotonic import IsotonicRegression", - "from sklearn.isotonic import _make_unique", - "from sklearn.utils.validation import check_array", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils import shuffle", - "from scipy.special import expit" - ], - "classes": [], - "functions": [ - { - "name": "test_permutation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_small_number_of_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_up", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_up_extreme", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_down", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_increasing_down_extreme", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_ci_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_ties_min", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_ties_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_ties_secondary_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test isotonic regression fit, transform and fit_transform\nagainst the \"secondary\" ties method and \"pituitary\" data from R\n \"isotone\" package, as detailed in: J. d. Leeuw, K. Hornik, P. Mair,\n Isotone Optimization in R: Pool-Adjacent-Violators Algorithm\n(PAVA) and Active Set Methods\n\nSet values based on pituitary example and\n the following R command detailed in the paper above:\n> library(\"isotone\")\n> data(\"pituitary\")\n> res1 <- gpava(pituitary$age, pituitary$size, ties=\"secondary\")\n> res1$x\n\n`isotone` version: 1.0-2, 2014-09-07\nR version: R version 3.1.1 (2014-07-10)" - }, - { - "name": "test_isotonic_regression_with_ties_in_differently_sized_groups", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Non-regression test to handle issue 9432:\nhttps://github.com/scikit-learn/scikit-learn/issues/9432\n\nCompare against output in R:\n> library(\"isotone\")\n> x <- c(0, 1, 1, 2, 3, 4)\n> y <- c(0, 0, 1, 0, 0, 1)\n> res1 <- gpava(x, y, ties=\"secondary\")\n> res1$x\n\n`isotone` version: 1.1-0, 2015-07-24\nR version: R version 3.3.2 (2016-10-31)" - }, - { - "name": "test_isotonic_regression_reversed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_auto_decreasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_auto_increasing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_raises_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_sample_weight_parameter_default_value", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_min_max_boundaries", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_clip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_bad", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_oob_bad_after", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_regression_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_duplicate_min_entry", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_ymin_ymax", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_zero_weight_loop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fast_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_copy_before_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_mismatched_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_unique_dtype", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_unique_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_make_unique_tolerance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_non_regression_inf_slope", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_thresholds", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_shape_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_isotonic_2darray_more_than_1_feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_kernel_approximation", - "imports": [ - "import numpy as np", - "from scipy.sparse import csr_matrix", - "import pytest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.metrics.pairwise import kernel_metrics", - "from sklearn.kernel_approximation import RBFSampler", - "from sklearn.kernel_approximation import AdditiveChi2Sampler", - "from sklearn.kernel_approximation import SkewedChi2Sampler", - "from sklearn.kernel_approximation import Nystroem", - "from sklearn.kernel_approximation import PolynomialCountSketch", - "from sklearn.metrics.pairwise import polynomial_kernel", - "from sklearn.metrics.pairwise import rbf_kernel", - "from sklearn.metrics.pairwise import chi2_kernel" - ], - "classes": [], - "functions": [ - { - "name": "test_polynomial_count_sketch_raises_if_degree_lower_than_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_polynomial_count_sketch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_linear_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_additive_chi2_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_skewed_chi2_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_additive_chi2_sampler_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensures correct error message" - }, - { - "name": "test_rbf_sampler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_approximation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_default_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_singular_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_poly_kernel_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_callable", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nystroem_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_kernel_ridge", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.datasets import make_regression", - "from sklearn.linear_model import Ridge", - "from sklearn.kernel_ridge import KernelRidge", - "from sklearn.metrics.pairwise import pairwise_kernels", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import assert_array_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_kernel_ridge", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_csr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_singular_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_precomputed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_precomputed_kernel_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_multi_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kernel_ridge_pairwise_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_metaestimators", - "imports": [ - "import functools", - "import numpy as np", - "from sklearn.base import BaseEstimator", - "from sklearn.datasets import make_classification", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.pipeline import Pipeline", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import RandomizedSearchCV", - "from sklearn.feature_selection import RFE", - "from sklearn.feature_selection import RFECV", - "from sklearn.ensemble import BaggingClassifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.semi_supervised import SelfTrainingClassifier" - ], - "classes": [ - { - "name": "DelegatorData", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_metaestimator_delegation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_min_dependencies_readme", - "imports": [ - "import os", - "import re", - "from pathlib import Path", - "import pytest", - "import sklearn", - "from sklearn._min_dependencies import dependent_packages", - "from sklearn.utils.fixes import parse_version" - ], - "classes": [], - "functions": [ - { - "name": "test_min_dependencies_readme", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_multiclass", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from re import escape", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_raises_regexp", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._mocking import CheckingClassifier", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.multiclass import OneVsOneClassifier", - "from sklearn.multiclass import OutputCodeClassifier", - "from sklearn.utils.multiclass import check_classification_targets", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils import check_array", - "from sklearn.utils import shuffle", - "from sklearn.metrics import precision_score", - "from sklearn.metrics import recall_score", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import SVC", - "from sklearn.naive_bayes import MultinomialNB", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import ElasticNet", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import Perceptron", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.model_selection import cross_val_score", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import make_pipeline", - "from sklearn.impute import SimpleImputer", - "from sklearn import svm", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_ovr_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_classification_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_partial_fit_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_ovo_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_fit_predict_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_always_present", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_binary", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_fit_predict_svc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_single_label_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_multilabel_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_single_label_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_coef_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_coef_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_deprecated_coef_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_fit_on_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_partial_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_ties", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_ties2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_string_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_one_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_float_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_float_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ecoc_delegate_sparse_base_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_tag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pairwise_cross_val_score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_support_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_multioutput", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from joblib import cpu_count", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn import datasets", - "from sklearn.base import clone", - "from sklearn.datasets import make_classification", - "from sklearn.ensemble import GradientBoostingRegressor", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.exceptions import NotFittedError", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import OrthogonalMatchingPursuit", - "from sklearn.linear_model import Ridge", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.linear_model import SGDRegressor", - "from sklearn.metrics import jaccard_score", - "from sklearn.metrics import mean_squared_error", - "from sklearn.multiclass import OneVsRestClassifier", - "from sklearn.multioutput import ClassifierChain", - "from sklearn.multioutput import RegressorChain", - "from sklearn.multioutput import MultiOutputClassifier", - "from sklearn.multioutput import MultiOutputRegressor", - "from sklearn.svm import LinearSVC", - "from sklearn.base import ClassifierMixin", - "from sklearn.utils import shuffle", - "from sklearn.model_selection import GridSearchCV", - "from sklearn.dummy import DummyRegressor", - "from sklearn.dummy import DummyClassifier", - "from sklearn.pipeline import make_pipeline", - "from sklearn.impute import SimpleImputer" - ], - "classes": [ - { - "name": "DummyRegressorWithFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "DummyClassifierWithFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_multi_target_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_regression_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_regression_one_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sparse_regression", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sample_weights_api", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sample_weight_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_target_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit_parallelism", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_hasattr_multi_output_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit_no_first_classes_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_multioutput_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multiclass_multioutput_estimator_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classification_partial_fit_sample_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_exceptions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "generate_multilabel_dataset_with_correlations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_fit_and_predict_with_linear_svc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_fit_and_predict_with_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_vs_independent_models", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_fit_and_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_fit_and_predict_with_sparse_data_and_cv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_random_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_base_chain_crossval_fit_and_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multi_output_classes_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput_estimator_with_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regressor_chain_w_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_support_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_tuple_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classifier_chain_tuple_invalid_order", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_naive_bayes", - "imports": [ - "import pickle", - "from io import BytesIO", - "import numpy as np", - "import scipy.sparse", - "import pytest", - "from sklearn.datasets import load_digits", - "from sklearn.datasets import load_iris", - "from sklearn.model_selection import train_test_split", - "from sklearn.model_selection import cross_val_score", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.naive_bayes import GaussianNB", - "from sklearn.naive_bayes import BernoulliNB", - "from sklearn.naive_bayes import MultinomialNB", - "from sklearn.naive_bayes import ComplementNB", - "from sklearn.naive_bayes import CategoricalNB" - ], - "classes": [], - "functions": [ - { - "name": "test_gnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether sample weights are properly used in GNB. " - }, - { - "name": "test_gnb_neg_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether an error is raised in case of negative priors" - }, - { - "name": "test_gnb_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether the class prior override is properly used" - }, - { - "name": "test_gnb_priors_sum_isclose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_wrong_nb_priors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether an error is raised if the number of prior is different\nfrom the number of class" - }, - { - "name": "test_gnb_prior_greater_one", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if an error is raised if the sum of prior greater than one" - }, - { - "name": "test_gnb_prior_large_bias", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if good prediction when class prior favor largely one class" - }, - { - "name": "test_gnb_check_update_with_no_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test when the partial fit is called without any data" - }, - { - "name": "test_gnb_pfit_wrong_nb_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test whether an error is raised when the number of feature changes\nbetween two partial fit" - }, - { - "name": "test_gnb_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gnb_naive_bayes_scale_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_deprecated_coef_intercept", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_input_check_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_input_check_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_uniform_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_provide_prior", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_provide_prior_with_partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_sample_weight_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_discretenb_coef_intercept_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mnnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mnb_prior_unobserved_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mnb_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bnb_feature_log_prob", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categoricalnb", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categoricalnb_with_min_categories", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_categoricalnb_min_categories_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_alpha_vector", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_accuracy_on_digits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_pipeline", - "imports": [ - "from tempfile import mkdtemp", - "import shutil", - "import time", - "import re", - "import itertools", - "import pytest", - "import numpy as np", - "from scipy import sparse", - "import joblib", - "from sklearn.utils.fixes import parse_version", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import MinimalClassifier", - "from sklearn.utils._testing import MinimalRegressor", - "from sklearn.utils._testing import MinimalTransformer", - "from sklearn.base import clone", - "from sklearn.base import is_classifier", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import FeatureUnion", - "from sklearn.pipeline import make_pipeline", - "from sklearn.pipeline import make_union", - "from sklearn.svm import SVC", - "from sklearn.neighbors import LocalOutlierFactor", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.linear_model import Lasso", - "from sklearn.linear_model import LinearRegression", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import r2_score", - "from sklearn.cluster import KMeans", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import f_classif", - "from sklearn.dummy import DummyRegressor", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.datasets import load_iris", - "from sklearn.preprocessing import StandardScaler", - "from sklearn.feature_extraction.text import CountVectorizer", - "from sklearn.experimental import enable_hist_gradient_boosting", - "from sklearn.ensemble import HistGradientBoostingClassifier", - "from sklearn.impute import SimpleImputer" - ], - "classes": [ - { - "name": "NoFit", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Small class to test parameter dispatching.\n " - }, - { - "name": "NoTrans", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoInvTransf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Transf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TransfFitParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Mult", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inverse_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "FitParamT", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mock classifier\n " - }, - { - "name": "DummyTransf", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Transformer which store the column means" - }, - { - "name": "DummyEstimatorParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Mock classifier that takes params on predict" - }, - { - "name": "DummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "WrongDummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_pipeline_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_init_tuple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_methods_anova", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_sample_weight_supported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_sample_weight_unsupported", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_raise_set_params_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_methods_pca_svm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_score_samples_pca_lof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_score_samples_on_pipeline_without_score_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_methods_preprocessing_svm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_predict_on_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_predict_on_pipeline_without_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fit_predict_with_intermediate_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_predict_with_predict_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_union_kwargs", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_slice", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_index", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_pipeline_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_named_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_correctly_adjusts_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_pipeline_step_passthrough", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_ducktyping", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_parallel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_feature_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_feature_union_steps", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_feature_union_step_drop", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_step_name_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_set_params_nested_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_wrong_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_with_cache_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_make_pipeline_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_param_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_verbose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_features_in_feature_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_missing_values_leniency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_feature_union_warns_unknown_transformer_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline_get_tags_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_search_cv_using_minimal_compatible_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests.test_random_projection", - "imports": [ - "import functools", - "from typing import List", - "from typing import Any", - "import numpy as np", - "import scipy.sparse as sp", - "import pytest", - "from sklearn.metrics import euclidean_distances", - "from sklearn.random_projection import johnson_lindenstrauss_min_dim", - "from sklearn.random_projection import _gaussian_random_matrix", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.random_projection import SparseRandomProjection", - "from sklearn.random_projection import GaussianRandomProjection", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.exceptions import DataDimensionalityWarning" - ], - "classes": [], - "functions": [ - { - "name": "make_sparse_random_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "densify", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_invalid_jl_domain", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_input_size_jl_min_dim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_input_size_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_size_generated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_zero_mean_and_unit_norm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_input_with_sparse_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_basic_property_of_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_basic_property_of_sparse_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gaussian_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_random_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_random_projection_transformer_invalid_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_projection_transformer_invalid_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_try_to_transform_before_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_too_many_samples_to_find_a_safe_embedding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_projection_embedding_quality", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_SparseRandomProjection_output_representation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_correct_RandomProjection_dimensions_embedding", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warning_n_components_greater_than_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_works_with_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.tree.setup", - "imports": [ - "import os", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree._classes", - "imports": [ - "import numbers", - "import warnings", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from math import ceil", - "import numpy as np", - "from scipy.sparse import issparse", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import clone", - "from base import RegressorMixin", - "from base import is_classifier", - "from base import MultiOutputMixin", - "from utils import Bunch", - "from utils import check_random_state", - "from utils.validation import _check_sample_weight", - "from utils import compute_sample_weight", - "from utils.multiclass import check_classification_targets", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from _criterion import Criterion", - "from _splitter import Splitter", - "from _tree import DepthFirstTreeBuilder", - "from _tree import BestFirstTreeBuilder", - "from _tree import Tree", - "from _tree import _build_pruned_tree_ccp", - "from _tree import ccp_pruning_path", - "from None import _tree", - "from None import _splitter", - "from None import _criterion" - ], - "classes": [ - { - "name": "BaseDecisionTree", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the depth of the decision tree.\n\nThe depth of a tree is the maximum distance between the root\nand any leaf.\n\nReturns\n-------\nself.tree_.max_depth : int\n The maximum depth of the tree." - }, - { - "name": "get_n_leaves", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the number of leaves of the decision tree.\n\nReturns\n-------\nself.tree_.n_leaves : int\n Number of leaves." - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_X_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate the training data on predict (probabilities)." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes, or the predict values." - }, - { - "name": "apply", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples,)\n For each datapoint x in X, return the index of the leaf x\n ends up in. Leaves are numbered within\n ``[0; self.tree_.node_count)``, possibly with gaps in the\n numbering." - }, - { - "name": "decision_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return the decision path in the tree.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator CSR matrix where non zero elements\n indicates that the samples goes through the nodes." - }, - { - "name": "_prune_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Prune tree using Minimal Cost-Complexity Pruning." - }, - { - "name": "cost_complexity_pruning_path", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels) as integers or strings." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute the pruning path during Minimal Cost-Complexity Pruning.\n\nSee :ref:`minimal_cost_complexity_pruning` for details on the pruning\nprocess.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\nReturns\n-------\nccp_path : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n ccp_alphas : ndarray\n Effective alphas of subtree during pruning.\n\n impurities : ndarray\n Sum of the impurities of the subtree leaves for the\n corresponding alpha value in ``ccp_alphas``." - }, - { - "name": "feature_importances_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the feature importances.\n\nThe importance of a feature is computed as the (normalized) total\nreduction of the criterion brought by that feature.\nIt is also known as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n Normalized total reduction of criteria by feature\n (Gini importance)." - } - ], - "docstring": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead." - }, - { - "name": "DecisionTreeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain." - }, - { - "name": "splitter", - "type": "Literal[\"best\", \"random\"]", - "hasDefault": true, - "default": "\"best\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``\"best\"``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary ` for details." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "class_weight", - "type": "Union[Dict, List[Dict], Literal[\"balanced\"]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (class labels) as integers or strings." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - }, - { - "name": "X_idx_sorted", - "type": null, - "hasDefault": true, - "default": "\"deprecated\"", - "limitation": null, - "ignored": false, - "docstring": "This parameter is deprecated and has no effect. It will be removed in 1.1 (renaming of 0.26). .. deprecated :: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a decision tree classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nX_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\nReturns\n-------\nself : DecisionTreeClassifier\n Fitted estimator." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs such arrays if n_outputs > 1\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - }, - { - "name": "predict_log_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict class log-probabilities of the input samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs such arrays if n_outputs > 1\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`." - } - ], - "docstring": "A decision tree classifier.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeRegressor : A decision tree regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe :meth:`predict` method operates using the :func:`numpy.argmax`\nfunction on the outputs of :meth:`predict_proba`. This means that in\ncase the highest predicted probabilities are tied, the classifier will\npredict the tied class with the lowest index in :term:`classes_`.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> clf = DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n>>> cross_val_score(clf, iris.data, iris.target, cv=10)\n... # doctest: +SKIP\n...\narray([ 1. , 0.93..., 0.86..., 0.93..., 0.93...,\n 0.93..., 0.93..., 1. , 0.93..., 1. ])" - }, - { - "name": "DecisionTreeRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"mse\", \"friedman_mse\", \"mae\", \"poisson\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion and minimizes the L2 loss using the mean of each terminal node, \"friedman_mse\", which uses mean squared error with Friedman's improvement score for potential splits, \"mae\" for the mean absolute error, which minimizes the L1 loss using the median of each terminal node, and \"poisson\" which uses reduction in Poisson deviance to find splits. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion. .. versionadded:: 0.24 Poisson deviance criterion." - }, - { - "name": "splitter", - "type": "Literal[\"best\", \"random\"]", - "hasDefault": true, - "default": "\"best\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``\"best\"``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary ` for details." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The target values (real numbers). Use ``dtype=np.float64`` and ``order='C'`` for maximum efficiency." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Allow to bypass several input checking. Don't use this parameter unless you know what you do." - }, - { - "name": "X_idx_sorted", - "type": null, - "hasDefault": true, - "default": "\"deprecated\"", - "limitation": null, - "ignored": false, - "docstring": "This parameter is deprecated and has no effect. It will be removed in 1.1 (renaming of 0.26). .. deprecated :: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a decision tree regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (real numbers). Use ``dtype=np.float64`` and\n ``order='C'`` for maximum efficiency.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nX_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\nReturns\n-------\nself : DecisionTreeRegressor\n Fitted estimator." - }, - { - "name": "_compute_partial_dependence_recursion", - "decorators": [], - "parameters": [ - { - "name": "grid", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The grid points on which the partial dependence should be evaluated." - }, - { - "name": "target_features", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The set of target features for which the partial dependence should be evaluated." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point." - } - ], - "docstring": "A decision tree regressor.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"mse\", \"friedman_mse\", \"mae\", \"poisson\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and minimizes the L2 loss\n using the mean of each terminal node, \"friedman_mse\", which uses mean\n squared error with Friedman's improvement score for potential splits,\n \"mae\" for the mean absolute error, which minimizes the L1 loss using\n the median of each terminal node, and \"poisson\" which uses reduction in\n Poisson deviance to find splits.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the\n (normalized) total reduction of the criterion brought\n by that feature. It is also known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeClassifier : A decision tree classifier.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> regressor = DecisionTreeRegressor(random_state=0)\n>>> cross_val_score(regressor, X, y, cv=10)\n... # doctest: +SKIP\n...\narray([-0.39..., -0.46..., 0.02..., 0.06..., -0.50...,\n 0.16..., 0.11..., -0.73..., -0.30..., -0.00...])" - }, - { - "name": "ExtraTreeClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"gini\", \"entropy\"]", - "hasDefault": true, - "default": "\"gini\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain." - }, - { - "name": "splitter", - "type": "Literal[\"random\", \"best\"]", - "hasDefault": true, - "default": "\"random\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to pick randomly the `max_features` used at each split. See :term:`Glossary ` for details." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "class_weight", - "type": "Union[Dict, List[Dict], Literal[\"balanced\"]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeRegressor : An extremely randomized tree regressor.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.tree import ExtraTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> extra_tree = ExtraTreeClassifier(random_state=0)\n>>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n... X_train, y_train)\n>>> cls.score(X_test, y_test)\n0.8947..." - }, - { - "name": "ExtraTreeRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "criterion", - "type": "Literal[\"mse\", \"friedman_mse\", \"mae\"]", - "hasDefault": true, - "default": "\"mse\"", - "limitation": null, - "ignored": false, - "docstring": "The function to measure the quality of a split. Supported criteria are \"mse\" for the mean squared error, which is equal to variance reduction as feature selection criterion and \"mae\" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion. .. versionadded:: 0.24 Poisson deviance criterion." - }, - { - "name": "splitter", - "type": "Literal[\"random\", \"best\"]", - "hasDefault": true, - "default": "\"random\"", - "limitation": null, - "ignored": false, - "docstring": "The strategy used to choose the split at each node. Supported strategies are \"best\" to choose the best split and \"random\" to choose the best random split." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples." - }, - { - "name": "min_samples_split", - "type": "Union[float, int]", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_samples_leaf", - "type": "Union[float, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions." - }, - { - "name": "min_weight_fraction_leaf", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided." - }, - { - "name": "max_features", - "type": "Union[Literal[\"auto\", \"sqrt\", \"log2\"], int]", - "hasDefault": true, - "default": "\"auto\"", - "limitation": null, - "ignored": false, - "docstring": "The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=n_features`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to pick randomly the `max_features` used at each split. See :term:`Glossary ` for details." - }, - { - "name": "min_impurity_decrease", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19" - }, - { - "name": "min_impurity_split", - "type": "float", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of ``min_impurity_decrease`` in 0.19. The default value of ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it will be removed in 1.0 (renaming of 0.25). Use ``min_impurity_decrease`` instead." - }, - { - "name": "max_leaf_nodes", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes." - }, - { - "name": "ccp_alpha", - "type": null, - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"mse\", \"friedman_mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nmax_features_ : int\n The inferred value of max_features.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n Return impurity-based feature importances (the higher, the more\n important the feature).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeClassifier : An extremely randomized tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.tree import ExtraTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> extra_tree = ExtraTreeRegressor(random_state=0)\n>>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n... X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.33..." - } - ], - "functions": [] - }, - { - "name": "sklearn.tree._export", - "imports": [ - "from io import StringIO", - "from numbers import Integral", - "import numpy as np", - "from utils.validation import check_is_fitted", - "from utils.validation import _deprecate_positional_args", - "from base import is_classifier", - "from None import _criterion", - "from None import _tree", - "from _reingold_tilford import buchheim", - "from _reingold_tilford import Tree", - "from None import DecisionTreeClassifier", - "import warnings", - "import matplotlib.pyplot as plt", - "from matplotlib.text import Annotation" - ], - "classes": [ - { - "name": "Sentinel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_BaseTreeExporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_fill_color", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "node_to_str", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_DOTTreeExporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "export", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "tail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "head", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "recurse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "_MPLTreeExporter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_make_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "export", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "recurse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_color_brew", - "decorators": [], - "parameters": [ - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of colors required." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate n colors with equally spaced hues.\n\nParameters\n----------\nn : int\n The number of colors required.\n\nReturns\n-------\ncolor_list : list, length n\n List of n tuples of form (R, G, B) being the components of each color." - }, - { - "name": "plot_tree", - "decorators": [], - "parameters": [ - { - "name": "decision_tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision tree to be plotted." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the representation. If None, the tree is fully generated." - }, - { - "name": "feature_names", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the features. If None, generic names will be used (\"X[0]\", \"X[1]\", ...)." - }, - { - "name": "class_names", - "type": "Union[List[str], bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the target classes in ascending numerical order. Only relevant for classification and not supported for multi-output. If ``True``, shows a symbolic representation of the class name." - }, - { - "name": "label", - "type": "Literal['all', 'root', 'none']", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Whether to show informative labels for impurity, etc. Options include 'all' to show at every node, 'root' to show only at the top root node, or 'none' to not show at any node." - }, - { - "name": "filled", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output." - }, - { - "name": "impurity", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the impurity at each node." - }, - { - "name": "node_ids", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the ID number on each node." - }, - { - "name": "proportion", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, change the display of 'values' and/or 'samples' to be proportions and percentages respectively." - }, - { - "name": "rotate", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "This parameter has no effect on the matplotlib tree visualisation and it is kept here for backward compatibility. .. deprecated:: 0.23 ``rotate`` is deprecated in 0.23 and will be removed in 1.0 (renaming of 0.25)." - }, - { - "name": "rounded", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman." - }, - { - "name": "precision", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node." - }, - { - "name": "ax", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axes to plot to. If None, use current axis. Any previous content is cleared." - }, - { - "name": "fontsize", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Size of text font. If None, determined automatically to fit figure." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure`` to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\ndecision_tree : decision tree regressor or classifier\n The decision tree to be plotted.\n\nmax_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\nfeature_names : list of strings, default=None\n Names of each of the features.\n If None, generic names will be used (\"X[0]\", \"X[1]\", ...).\n\nclass_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\nimpurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\nrotate : bool, default=False\n This parameter has no effect on the matplotlib tree visualisation and\n it is kept here for backward compatibility.\n\n .. deprecated:: 0.23\n ``rotate`` is deprecated in 0.23 and will be removed in 1.0\n (renaming of 0.25).\n\nrounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\nprecision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\nax : matplotlib axis, default=None\n Axes to plot to. If None, use current axis. Any previous content\n is cleared.\n\nfontsize : int, default=None\n Size of text font. If None, determined automatically to fit figure.\n\nReturns\n-------\nannotations : list of artists\n List containing the artists for the annotation boxes making up the\n tree.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.plot_tree(clf) # doctest: +SKIP\n[Text(251.5,345.217,'X[3] <= 0.8..." - }, - { - "name": "export_graphviz", - "decorators": [], - "parameters": [ - { - "name": "decision_tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision tree to be exported to GraphViz." - }, - { - "name": "out_file", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Handle or name of the output file. If ``None``, the result is returned as a string. .. versionchanged:: 0.20 Default of out_file changed from \"tree.dot\" to None." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum depth of the representation. If None, the tree is fully generated." - }, - { - "name": "feature_names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the features. If None generic names will be used (\"feature_0\", \"feature_1\", ...)." - }, - { - "name": "class_names", - "type": "Union[List[str], bool]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Names of each of the target classes in ascending numerical order. Only relevant for classification and not supported for multi-output. If ``True``, shows a symbolic representation of the class name." - }, - { - "name": "label", - "type": "Literal['all', 'root', 'none']", - "hasDefault": true, - "default": "'all'", - "limitation": null, - "ignored": false, - "docstring": "Whether to show informative labels for impurity, etc. Options include 'all' to show at every node, 'root' to show only at the top root node, or 'none' to not show at any node." - }, - { - "name": "filled", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, paint nodes to indicate majority class for classification, extremity of values for regression, or purity of node for multi-output." - }, - { - "name": "leaves_parallel", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, draw all leaf nodes at the bottom of the tree." - }, - { - "name": "impurity", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the impurity at each node." - }, - { - "name": "node_ids", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, show the ID number on each node." - }, - { - "name": "proportion", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, change the display of 'values' and/or 'samples' to be proportions and percentages respectively." - }, - { - "name": "rotate", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, orient tree left to right rather than top-down." - }, - { - "name": "rounded", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``True``, draw node boxes with rounded corners and use Helvetica fonts instead of Times-Roman." - }, - { - "name": "special_characters", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When set to ``False``, ignore special characters for PostScript compatibility." - }, - { - "name": "precision", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of digits of precision for floating point in the values of impurity, threshold and value attributes of each node." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n $ dot -Tps tree.dot -o tree.ps (PostScript format)\n $ dot -Tpng tree.dot -o tree.png (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndecision_tree : decision tree classifier\n The decision tree to be exported to GraphViz.\n\nout_file : object or str, default=None\n Handle or name of the output file. If ``None``, the result is\n returned as a string.\n\n .. versionchanged:: 0.20\n Default of out_file changed from \"tree.dot\" to None.\n\nmax_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\nfeature_names : list of str, default=None\n Names of each of the features.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nclass_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\nleaves_parallel : bool, default=False\n When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\nimpurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\nrotate : bool, default=False\n When set to ``True``, orient tree left to right rather than top-down.\n\nrounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\nspecial_characters : bool, default=False\n When set to ``False``, ignore special characters for PostScript\n compatibility.\n\nprecision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\nReturns\n-------\ndot_data : string\n String representation of the input tree in GraphViz dot format.\n Only returned if ``out_file`` is None.\n\n .. versionadded:: 0.18\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier()\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.export_graphviz(clf)\n'digraph Tree {..." - }, - { - "name": "_compute_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the depth of the subtree rooted in node." - }, - { - "name": "export_text", - "decorators": [], - "parameters": [ - { - "name": "decision_tree", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The decision tree estimator to be exported. It can be an instance of DecisionTreeClassifier or DecisionTreeRegressor." - }, - { - "name": "feature_names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of length n_features containing the feature names. If None generic names will be used (\"feature_0\", \"feature_1\", ...)." - }, - { - "name": "max_depth", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Only the first max_depth levels of the tree are exported. Truncated branches will be marked with \"...\"." - }, - { - "name": "spacing", - "type": "int", - "hasDefault": true, - "default": "3", - "limitation": null, - "ignored": false, - "docstring": "Number of spaces between edges. The higher it is, the wider the result." - }, - { - "name": "decimals", - "type": "int", - "hasDefault": true, - "default": "2", - "limitation": null, - "ignored": false, - "docstring": "Number of decimal digits to display." - }, - { - "name": "show_weights", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If true the classification weights will be exported on each leaf. The classification weights are the number of samples each class." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Build a text report showing the rules of a decision tree.\n\nNote that backwards compatibility may not be supported.\n\nParameters\n----------\ndecision_tree : object\n The decision tree estimator to be exported.\n It can be an instance of\n DecisionTreeClassifier or DecisionTreeRegressor.\n\nfeature_names : list of str, default=None\n A list of length n_features containing the feature names.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nmax_depth : int, default=10\n Only the first max_depth levels of the tree are exported.\n Truncated branches will be marked with \"...\".\n\nspacing : int, default=3\n Number of spaces between edges. The higher it is, the wider the result.\n\ndecimals : int, default=2\n Number of decimal digits to display.\n\nshow_weights : bool, default=False\n If true the classification weights will be exported on each leaf.\n The classification weights are the number of samples each class.\n\nReturns\n-------\nreport : string\n Text summary of all the rules in the decision tree.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> from sklearn.tree import export_text\n>>> iris = load_iris()\n>>> X = iris['data']\n>>> y = iris['target']\n>>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)\n>>> decision_tree = decision_tree.fit(X, y)\n>>> r = export_text(decision_tree, feature_names=iris['feature_names'])\n>>> print(r)\n|--- petal width (cm) <= 0.80\n| |--- class: 0\n|--- petal width (cm) > 0.80\n| |--- petal width (cm) <= 1.75\n| | |--- class: 1\n| |--- petal width (cm) > 1.75\n| | |--- class: 2" - } - ] - }, - { - "name": "sklearn.tree._reingold_tilford", - "imports": [ - "import numpy as np" - ], - "classes": [ - { - "name": "DrawTree", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "left", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "right", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "lbrother", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_lmost_sibling", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__str__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "max_extents", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Tree", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "buchheim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "third_walk", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "first_walk", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "apportion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "move_subtree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "execute_shifts", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "ancestor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "second_walk", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree", - "imports": [ - "from _classes import BaseDecisionTree", - "from _classes import DecisionTreeClassifier", - "from _classes import DecisionTreeRegressor", - "from _classes import ExtraTreeClassifier", - "from _classes import ExtraTreeRegressor", - "from _export import export_graphviz", - "from _export import plot_tree", - "from _export import export_text" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.tree.tests.test_export", - "imports": [ - "from re import finditer", - "from re import search", - "from textwrap import dedent", - "from numpy.random import RandomState", - "import pytest", - "from sklearn.base import is_classifier", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.ensemble import GradientBoostingClassifier", - "from sklearn.tree import export_graphviz", - "from sklearn.tree import plot_tree", - "from sklearn.tree import export_text", - "from io import StringIO", - "from sklearn.exceptions import NotFittedError" - ], - "classes": [], - "functions": [ - { - "name": "test_graphviz_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_graphviz_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_friedman_mse_in_graphviz", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_precision", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_export_text_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_export_text", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_tree_entropy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_tree_gini", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_plot_tree_rotate_deprecation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_not_fitted_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree.tests.test_reingold_tilford", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.tree._reingold_tilford import buchheim", - "from sklearn.tree._reingold_tilford import Tree" - ], - "classes": [], - "functions": [ - { - "name": "test_buchheim", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree.tests.test_tree", - "imports": [ - "import copy", - "import pickle", - "from itertools import product", - "import struct", - "import pytest", - "import numpy as np", - "from numpy.testing import assert_allclose", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import coo_matrix", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.dummy import DummyRegressor", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import mean_squared_error", - "from sklearn.metrics import mean_poisson_deviance", - "from sklearn.model_selection import train_test_split", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import create_memmap_backed_data", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils.estimator_checks import check_sample_weights_invariance", - "from sklearn.utils.validation import check_random_state", - "from sklearn.exceptions import NotFittedError", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.tree import DecisionTreeRegressor", - "from sklearn.tree import ExtraTreeClassifier", - "from sklearn.tree import ExtraTreeRegressor", - "from sklearn import tree", - "from sklearn.tree._tree import TREE_LEAF", - "from sklearn.tree._tree import TREE_UNDEFINED", - "from sklearn.tree._classes import CRITERIA_CLF", - "from sklearn.tree._classes import CRITERIA_REG", - "from sklearn import datasets", - "from sklearn.utils import compute_sample_weight", - "from sklearn.tree._utils import _realloc_test" - ], - "classes": [], - "functions": [ - { - "name": "assert_tree_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_classification_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_regression_toy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_xor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_diabetes_overfit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_diabetes_underfit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_probability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_arrayrepr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pure_set", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances_raises", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_importances_gini_equal_mse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_samples_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test min_samples_split parameter" - }, - { - "name": "test_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_fraction_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test if leaves contain at least min_weight_fraction_leaf of the\ntraining set" - }, - { - "name": "test_min_weight_fraction_leaf_on_dense_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_fraction_leaf_on_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_fraction_leaf_with_min_samples_leaf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test the interaction between min_weight_fraction_leaf and\nmin_samples_leaf when sample_weights is not provided in fit." - }, - { - "name": "test_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_split", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_impurity_decrease", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_classes_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unbalanced_iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_memory_layout", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_weight_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check class_weights resemble sample_weights behavior." - }, - { - "name": "test_class_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_max_leaf_nodes_max_depth", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_arrays_persist", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_only_constant_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_behaviour_constant_feature_after_splits", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_with_only_one_non_constant_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_big_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_realloc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_huge_allocations", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse_input_reg_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sparse_criterion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_explicit_sparse_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_explicit_sparse_zeros", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_raise_error_on_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_1d_input", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_min_weight_leaf_split_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_min_weight_leaf_split_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_weight_leaf_split_level", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_public_apply", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_public_apply_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_public_apply_all_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_public_apply_sparse_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_path_hardcoded", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_no_sparse_y_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_sparse_y_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mae", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check MAE criterion produces correct results on small toy dataset:\n\n------------------\n| X | y | weight |\n------------------\n| 3 | 3 | 0.1 |\n| 5 | 3 | 0.3 |\n| 8 | 4 | 1.0 |\n| 3 | 6 | 0.6 |\n| 5 | 7 | 0.3 |\n------------------\n|sum wt:| 2.3 |\n------------------\n\nBecause we are dealing with sample weights, we cannot find the median by\nsimply choosing/averaging the centre value(s), instead we consider the\nmedian where 50% of the cumulative weight is found (in a y sorted data set)\n. Therefore with regards to this test data, the cumulative weight is >= 50%\nwhen y = 4. Therefore:\nMedian = 4\n\nFor all the samples, we can get the total error by summing:\nAbsolute(Median - y) * weight\n\nI.e., total error = (Absolute(4 - 3) * 0.1)\n + (Absolute(4 - 3) * 0.3)\n + (Absolute(4 - 4) * 1.0)\n + (Absolute(4 - 6) * 0.6)\n + (Absolute(4 - 7) * 0.3)\n = 2.5\n\nImpurity = Total error / total weight\n = 2.5 / 2.3\n = 1.08695652173913\n ------------------\n\nFrom this root node, the next best split is between X values of 3 and 5.\nThus, we have left and right child nodes:\n\nLEFT RIGHT\n------------------ ------------------\n| X | y | weight | | X | y | weight |\n------------------ ------------------\n| 3 | 3 | 0.1 | | 5 | 3 | 0.3 |\n| 3 | 6 | 0.6 | | 8 | 4 | 1.0 |\n------------------ | 5 | 7 | 0.3 |\n|sum wt:| 0.7 | ------------------\n------------------ |sum wt:| 1.6 |\n ------------------\n\nImpurity is found in the same way:\nLeft node Median = 6\nTotal error = (Absolute(6 - 3) * 0.1)\n + (Absolute(6 - 6) * 0.6)\n = 0.3\n\nLeft Impurity = Total error / total weight\n = 0.3 / 0.7\n = 0.428571428571429\n -------------------\n\nLikewise for Right node:\nRight node Median = 4\nTotal error = (Absolute(4 - 3) * 0.3)\n + (Absolute(4 - 4) * 1.0)\n + (Absolute(4 - 7) * 0.3)\n = 1.2\n\nRight Impurity = Total error / total weight\n = 1.2 / 1.6\n = 0.75\n ------" - }, - { - "name": "test_criterion_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_empty_leaf_infinite_threshold", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_tree_classifier_are_subtrees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_tree_regression_are_subtrees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_single_node_tree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_pruning_creates_subtree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "assert_is_subtree", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_prune_tree_raises_negative_ccp_alpha", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_apply_path_readonly", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_apply_path_readonly_all_trees", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_balance_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_zero_nodes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_poisson_vs_mse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_decision_tree_regressor_sample_weight_consistentcy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that the impact of sample_weight is consistent." - }, - { - "name": "test_X_idx_sorted_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.tree.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.utils.class_weight", - "imports": [ - "import numpy as np", - "from validation import _deprecate_positional_args", - "from preprocessing import LabelEncoder" - ], - "classes": [], - "functions": [ - { - "name": "compute_class_weight", - "decorators": [], - "parameters": [ - { - "name": "class_weight", - "type": "Union[Dict, Literal['balanced']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If 'balanced', class weights will be given by ``n_samples / (n_classes * np.bincount(y))``. If a dictionary is given, keys are classes and values are corresponding class weights. If None is given, the class weights will be uniform." - }, - { - "name": "classes", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of the classes occurring in the data, as given by ``np.unique(y_org)`` with ``y_org`` the original class labels." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of original class labels per sample." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate class weights for unbalanced datasets.\n\nParameters\n----------\nclass_weight : dict, 'balanced' or None\n If 'balanced', class weights will be given by\n ``n_samples / (n_classes * np.bincount(y))``.\n If a dictionary is given, keys are classes and values\n are corresponding class weights.\n If None is given, the class weights will be uniform.\n\nclasses : ndarray\n Array of the classes occurring in the data, as given by\n ``np.unique(y_org)`` with ``y_org`` the original class labels.\n\ny : array-like of shape (n_samples,)\n Array of original class labels per sample.\n\nReturns\n-------\nclass_weight_vect : ndarray of shape (n_classes,)\n Array with class_weight_vect[i] the weight for i-th class.\n\nReferences\n----------\nThe \"balanced\" heuristic is inspired by\nLogistic Regression in Rare Events Data, King, Zen, 2001." - }, - { - "name": "compute_sample_weight", - "decorators": [], - "parameters": [ - { - "name": "class_weight", - "type": "Union[Dict, List, Literal[\"balanced\"]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data: ``n_samples / (n_classes * np.bincount(y))``. For multi-output, the weights of each column of y will be multiplied." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of original class labels per sample." - }, - { - "name": "indices", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of indices to be used in a subsample. Can be of length less than n_samples in the case of a subsample, or equal to n_samples in the case of a bootstrap subsample with repeated indices. If None, the sample weight will be calculated over the full sample. Only \"balanced\" is supported for class_weight if this is provided." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Estimate sample weights by class for unbalanced datasets.\n\nParameters\n----------\nclass_weight : dict, list of dicts, \"balanced\", or None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data:\n ``n_samples / (n_classes * np.bincount(y))``.\n\n For multi-output, the weights of each column of y will be multiplied.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Array of original class labels per sample.\n\nindices : array-like of shape (n_subsample,), default=None\n Array of indices to be used in a subsample. Can be of length less than\n n_samples in the case of a subsample, or equal to n_samples in the\n case of a bootstrap subsample with repeated indices. If None, the\n sample weight will be calculated over the full sample. Only \"balanced\"\n is supported for class_weight if this is provided.\n\nReturns\n-------\nsample_weight_vect : ndarray of shape (n_samples,)\n Array with sample weights as applied to the original y." - } - ] - }, - { - "name": "sklearn.utils.deprecation", - "imports": [ - "import warnings", - "import functools" - ], - "classes": [ - { - "name": "deprecated", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "extra", - "type": "str", - "hasDefault": true, - "default": "''", - "limitation": null, - "ignored": false, - "docstring": "To be added to the deprecation messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [ - { - "name": "obj", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Call method\n\nParameters\n----------\nobj : object" - }, - { - "name": "_decorate_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_decorate_fun", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorate function fun" - }, - { - "name": "_decorate_property", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_update_doc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n\n\n>>> @deprecated()\n... def some_function(): pass\n\nParameters\n----------\nextra : str, default=''\n To be added to the deprecation messages." - } - ], - "functions": [ - { - "name": "_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper to check if func is wrapped by our deprecated decorator" - } - ] - }, - { - "name": "sklearn.utils.estimator_checks", - "imports": [ - "import types", - "import warnings", - "import pickle", - "import re", - "from copy import deepcopy", - "from functools import partial", - "from functools import wraps", - "from inspect import signature", - "import numpy as np", - "from scipy import sparse", - "from scipy.stats import rankdata", - "import joblib", - "from None import IS_PYPY", - "from None import config_context", - "from _testing import _get_args", - "from _testing import assert_raise_message", - "from _testing import assert_array_equal", - "from _testing import assert_array_almost_equal", - "from _testing import assert_allclose", - "from _testing import assert_allclose_dense_sparse", - "from _testing import set_random_state", - "from _testing import SkipTest", - "from _testing import ignore_warnings", - "from _testing import create_memmap_backed_data", - "from _testing import raises", - "from None import is_scalar_nan", - "from linear_model import LogisticRegression", - "from linear_model import Ridge", - "from base import clone", - "from base import ClusterMixin", - "from base import is_classifier", - "from base import is_regressor", - "from base import is_outlier_detector", - "from base import RegressorMixin", - "from base import _is_pairwise", - "from metrics import accuracy_score", - "from metrics import adjusted_rand_score", - "from metrics import f1_score", - "from random_projection import BaseRandomProjection", - "from feature_selection import SelectKBest", - "from pipeline import make_pipeline", - "from exceptions import DataConversionWarning", - "from exceptions import NotFittedError", - "from exceptions import SkipTestWarning", - "from model_selection import train_test_split", - "from model_selection import ShuffleSplit", - "from model_selection._validation import _safe_split", - "from metrics.pairwise import rbf_kernel", - "from metrics.pairwise import linear_kernel", - "from metrics.pairwise import pairwise_distances", - "from None import shuffle", - "from _tags import _DEFAULT_TAGS", - "from _tags import _safe_tags", - "from validation import has_fit_parameter", - "from validation import _num_samples", - "from preprocessing import StandardScaler", - "from preprocessing import scale", - "from datasets import load_iris", - "from datasets import make_blobs", - "from datasets import make_multilabel_classification", - "from datasets import make_regression", - "import pytest", - "import pandas as pd" - ], - "classes": [ - { - "name": "_NotAnArray", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__array__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__array_function__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "An object that is convertible to an array.\n\nParameters\n----------\ndata : array-like\n The data." - } - ], - "functions": [ - { - "name": "_yield_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_classifier_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_supervised_y_no_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_regressor_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_transformer_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_clustering_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_outliers_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_yield_all_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_check_estimator_ids", - "decorators": [], - "parameters": [ - { - "name": "obj", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Items generated by `check_estimator`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create pytest ids for checks.\n\nWhen `obj` is an estimator, this returns the pprint version of the\nestimator (with `print_changed_only=True`). When `obj` is a function, the\nname of the function is returned with its keyword arguments.\n\n`_get_check_estimator_ids` is designed to be used as the `id` in\n`pytest.mark.parametrize` where `check_estimator(..., generate_only=True)`\nis yielding estimators and checks.\n\nParameters\n----------\nobj : estimator or function\n Items generated by `check_estimator`.\n\nReturns\n-------\nid : str or None\n\nSee Also\n--------\ncheck_estimator" - }, - { - "name": "_construct_instance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Construct Estimator instance if possible." - }, - { - "name": "_maybe_mark_xfail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_maybe_skip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_should_be_skipped_or_marked", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "parametrize_with_checks", - "decorators": [], - "parameters": [ - { - "name": "estimators", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimators to generated checks for. .. versionchanged:: 0.24 Passing a class was deprecated in version 0.23, and support for classes was removed in 0.24. Pass an instance instead. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\n pytest test_check_estimators.py -k check_estimators_fit_returns_self\n\nParameters\n----------\nestimators : list of estimators instances\n Estimators to generated checks for.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24. Pass an instance instead.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndecorator : `pytest.mark.parametrize`\n\nExamples\n--------\n>>> from sklearn.utils.estimator_checks import parametrize_with_checks\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.tree import DecisionTreeRegressor\n\n>>> @parametrize_with_checks([LogisticRegression(),\n... DecisionTreeRegressor()])\n... def test_sklearn_compatible_estimator(estimator, check):\n... check(estimator)" - }, - { - "name": "check_estimator", - "decorators": [], - "parameters": [ - { - "name": "Estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator instance to check. .. versionchanged:: 0.24 Passing a class was deprecated in version 0.23, and support for classes was removed in 0.24." - }, - { - "name": "generate_only", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When `False`, checks are evaluated when `check_estimator` is called. When `True`, `check_estimator` returns a generator that yields (estimator, check) tuples. The check is run by calling `check(estimator)`. .. versionadded:: 0.22" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Check if estimator adheres to scikit-learn conventions.\n\nThis estimator will run an extensive test-suite for input validation,\nshapes, etc, making sure that the estimator complies with `scikit-learn`\nconventions as detailed in :ref:`rolling_your_own_estimator`.\nAdditional tests for classifiers, regressors, clustering or transformers\nwill be run if the Estimator class inherits from the corresponding mixin\nfrom sklearn.base.\n\nSetting `generate_only=True` returns a generator that yields (estimator,\ncheck) tuples where the check can be called independently from each\nother, i.e. `check(estimator)`. This allows all checks to be run\nindependently and report the checks that are failing.\n\nscikit-learn provides a pytest specific decorator,\n:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test\nmultiple estimators.\n\nParameters\n----------\nEstimator : estimator object\n Estimator instance to check.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24.\n\ngenerate_only : bool, default=False\n When `False`, checks are evaluated when `check_estimator` is called.\n When `True`, `check_estimator` returns a generator that yields\n (estimator, check) tuples. The check is run by calling\n `check(estimator)`.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nchecks_generator : generator\n Generator that yields (estimator, check) tuples. Returned when\n `generate_only=True`." - }, - { - "name": "_regression_dataset", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_checking_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_pairwise_metric", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator object to test." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns True if estimator accepts pairwise metric.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if _pairwise is set to True and False otherwise." - }, - { - "name": "_pairwise_estimator_convert_X", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_generate_sparse_matrix", - "decorators": [], - "parameters": [ - { - "name": "X_csr: CSR Matrix", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input matrix in CSR format." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate sparse matrices with {32,64}bit indices of diverse format.\n\nParameters\n----------\nX_csr: CSR Matrix\n Input matrix in CSR format.\n\nReturns\n-------\nout: iter(Matrices)\n In format['dok', 'lil', 'dia', 'bsr', 'csr', 'csc', 'coo',\n 'coo_64', 'csc_64', 'csr_64']" - }, - { - "name": "check_estimator_sparse_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_pandas_series", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_weights_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dtype_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_complex_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dict_unchanged", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_is_public_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_dont_overwrite_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit2d_predict1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_apply_on_subsets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_methods_subset_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_methods_sample_order_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit2d_1sample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit2d_1feature", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_general", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformers_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_pipeline_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit_score_takes_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_preserve_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_empty_data_messages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_nan_inf", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_nonsquare_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that error is thrown when non-square data provided." - }, - { - "name": "check_estimators_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that we can pickle all estimators." - }, - { - "name": "check_estimators_partial_fit_n_features", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifier_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressor_multioutput", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_clustering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_clusterer_compute_labels_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that predict is invariant of compute_labels." - }, - { - "name": "check_classifiers_one_label", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_outlier_corruption", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_outliers_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_multilabel_representation_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_fit_returns_self", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if self is returned when calling fit." - }, - { - "name": "check_estimators_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that predict raises an exception in an unfitted estimator.\n\nUnfitted estimators should raise a NotFittedError." - }, - { - "name": "check_supervised_y_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_predictions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_choose_check_classifiers_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_classes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressors_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressors_train", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressors_no_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_balanced_classifiers", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_class_weight_balanced_linear_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test class weights with non-contiguous class labels." - }, - { - "name": "check_estimators_overwrite_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_no_attributes_set_in_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check setting during init." - }, - { - "name": "check_sparsify_coefficients", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifier_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_regressor_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimators_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_parameters_default_constructible", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_enforce_estimator_tags_y", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_enforce_estimator_tags_x", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_non_transformer_estimators_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_transformer_n_iter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_get_params_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_classifiers_regression_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_decision_proba_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_outliers_fit_predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit_non_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_fit_idempotent", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_n_features_in", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_requires_y_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_n_features_in_after_fitting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_estimator_get_tags_default_keys", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.extmath", - "imports": [ - "import warnings", - "import numpy as np", - "from scipy import linalg", - "from scipy import sparse", - "from None import check_random_state", - "from _logistic_sigmoid import _log_logistic_sigmoid", - "from sparsefuncs_fast import csr_row_norms", - "from validation import check_array", - "from validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "squared_norm", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.\n\nParameters\n----------\nx : array-like\n\nReturns\n-------\nfloat\n The Euclidean norm when x is a vector, the Frobenius norm when x\n is a matrix (2-d array)." - }, - { - "name": "row_norms", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input array." - }, - { - "name": "squared", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return squared norms." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Row-wise (squared) Euclidean norm of X.\n\nEquivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse\nmatrices and does not create an X.shape-sized temporary.\n\nPerforms no input validation.\n\nParameters\n----------\nX : array-like\n The input array.\nsquared : bool, default=False\n If True, return squared norms.\n\nReturns\n-------\narray-like\n The row-wise (squared) Euclidean norm of X." - }, - { - "name": "fast_logdet", - "decorators": [], - "parameters": [ - { - "name": "A", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The matrix." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute log(det(A)) for A symmetric.\n\nEquivalent to : np.log(nl.det(A)) but more robust.\nIt returns -Inf if det(A) is non positive or is not defined.\n\nParameters\n----------\nA : array-like\n The matrix." - }, - { - "name": "density", - "decorators": [], - "parameters": [ - { - "name": "w", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sparse vector." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute density of a sparse vector.\n\nParameters\n----------\nw : array-like\n The sparse vector.\n\nReturns\n-------\nfloat\n The density of w, between 0 and 1." - }, - { - "name": "safe_sparse_dot", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "b", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "dense_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When False, ``a`` and ``b`` both being sparse will yield sparse output. When True, output will always be a dense array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Dot product that handle the sparse matrix case correctly.\n\nParameters\n----------\na : {ndarray, sparse matrix}\nb : {ndarray, sparse matrix}\ndense_output : bool, default=False\n When False, ``a`` and ``b`` both being sparse will yield sparse output.\n When True, output will always be a dense array.\n\nReturns\n-------\ndot_product : {ndarray, sparse matrix}\n Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``." - }, - { - "name": "randomized_range_finder", - "decorators": [], - "parameters": [ - { - "name": "A", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data matrix." - }, - { - "name": "size", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Size of the return array." - }, - { - "name": "n_iter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of power iterations used to stabilize the result." - }, - { - "name": "power_iteration_normalizer", - "type": "Literal['auto', 'QR', 'LU', 'none']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' (the fastest but numerically unstable when `n_iter` is large, e.g. typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter` <= 2 and switches to LU otherwise. .. versionadded:: 0.18" - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator to use when shuffling the data, i.e. getting the random vectors to initialize the algorithm. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes an orthonormal matrix whose range approximates the range of A.\n\nParameters\n----------\nA : 2D array\n The input data matrix.\n\nsize : int\n Size of the return array.\n\nn_iter : int\n Number of power iterations used to stabilize the result.\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nQ : ndarray\n A (size x size) projection matrix, the range of which\n approximates well the range of the input matrix A.\n\nNotes\n-----\n\nFollows Algorithm 4.3 of\nFinding structure with randomness: Stochastic algorithms for constructing\napproximate matrix decompositions\nHalko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\nAn implementation of a randomized algorithm for principal component\nanalysis\nA. Szlam et al. 2014" - }, - { - "name": "randomized_svd", - "decorators": [], - "parameters": [ - { - "name": "M", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to decompose." - }, - { - "name": "n_components", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of singular values and vectors to extract." - }, - { - "name": "n_oversamples", - "type": "int", - "hasDefault": true, - "default": "10", - "limitation": null, - "ignored": false, - "docstring": "Additional number of random vectors to sample the range of M so as to ensure proper conditioning. The total number of random vectors used to find the range of M is n_components + n_oversamples. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values." - }, - { - "name": "n_iter", - "type": "Union[Literal['auto'], int]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Number of power iterations. It can be used to deal with very noisy problems. When 'auto', it is set to 4, unless `n_components` is small (< .1 * min(X.shape)) `n_iter` in which case is set to 7. This improves precision with few components. .. versionchanged:: 0.18" - }, - { - "name": "power_iteration_normalizer", - "type": "Literal['auto', 'QR', 'LU', 'none']", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' (the fastest but numerically unstable when `n_iter` is large, e.g. typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter` <= 2 and switches to LU otherwise. .. versionadded:: 0.18" - }, - { - "name": "transpose", - "type": "Union[Literal['auto'], bool]", - "hasDefault": true, - "default": "'auto'", - "limitation": null, - "ignored": false, - "docstring": "Whether the algorithm should be applied to M.T instead of M. The result should approximately be the same. The 'auto' mode will trigger the transposition if M.shape[1] > M.shape[0] since this implementation of randomized SVD tend to be a little faster in that case. .. versionchanged:: 0.18" - }, - { - "name": "flip_sign", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "The output of a singular value decomposition is only unique up to a permutation of the signs of the singular vectors. If `flip_sign` is set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator to use when shuffling the data, i.e. getting the random vectors to initialize the algorithm. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes a truncated randomized SVD.\n\nParameters\n----------\nM : {ndarray, sparse matrix}\n Matrix to decompose.\n\nn_components : int\n Number of singular values and vectors to extract.\n\nn_oversamples : int, default=10\n Additional number of random vectors to sample the range of M so as\n to ensure proper conditioning. The total number of random vectors\n used to find the range of M is n_components + n_oversamples. Smaller\n number can improve speed but can negatively impact the quality of\n approximation of singular vectors and singular values.\n\nn_iter : int or 'auto', default='auto'\n Number of power iterations. It can be used to deal with very noisy\n problems. When 'auto', it is set to 4, unless `n_components` is small\n (< .1 * min(X.shape)) `n_iter` in which case is set to 7.\n This improves precision with few components.\n\n .. versionchanged:: 0.18\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\ntranspose : bool or 'auto', default='auto'\n Whether the algorithm should be applied to M.T instead of M. The\n result should approximately be the same. The 'auto' mode will\n trigger the transposition if M.shape[1] > M.shape[0] since this\n implementation of randomized SVD tend to be a little faster in that\n case.\n\n .. versionchanged:: 0.18\n\nflip_sign : bool, default=True\n The output of a singular value decomposition is only unique up to a\n permutation of the signs of the singular vectors. If `flip_sign` is\n set to `True`, the sign ambiguity is resolved by making the largest\n loadings for each component in the left singular vectors positive.\n\nrandom_state : int, RandomState instance or None, default=0\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nNotes\n-----\nThis algorithm finds a (usually very good) approximate truncated\nsingular value decomposition using randomization to speed up the\ncomputations. It is particularly fast on large matrices on which\nyou wish to extract only a small number of components. In order to\nobtain further speed up, `n_iter` can be set <=2 (at the cost of\nloss of precision).\n\nReferences\n----------\n* Finding structure with randomness: Stochastic algorithms for constructing\n approximate matrix decompositions\n Halko, et al., 2009 https://arxiv.org/abs/0909.4061\n\n* A randomized algorithm for the decomposition of matrices\n Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n* An implementation of a randomized algorithm for principal component\n analysis\n A. Szlam et al. 2014" - }, - { - "name": "weighted_mode", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-dimensional array of which to find mode(s)." - }, - { - "name": "w", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "n-dimensional array of weights for each value." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Axis along which to operate. Default is 0, i.e. the first axis." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns an array of the weighted modal (most common) value in a.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.\n\nParameters\n----------\na : array-like\n n-dimensional array of which to find mode(s).\nw : array-like\n n-dimensional array of weights for each value.\naxis : int, default=0\n Axis along which to operate. Default is 0, i.e. the first axis.\n\nReturns\n-------\nvals : ndarray\n Array of modal values.\nscore : ndarray\n Array of weighted counts for each mode.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import weighted_mode\n>>> x = [4, 1, 4, 2, 4, 2]\n>>> weights = [1, 1, 1, 1, 1, 1]\n>>> weighted_mode(x, weights)\n(array([4.]), array([3.]))\n\nThe value 4 appears three times: with uniform weights, the result is\nsimply the mode of the distribution.\n\n>>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's\n>>> weighted_mode(x, weights)\n(array([2.]), array([3.5]))\n\nThe value 2 has the highest score: it appears twice with weights of\n1.5 and 2: the sum of these is 3.5.\n\nSee Also\n--------\nscipy.stats.mode" - }, - { - "name": "cartesian", - "decorators": [], - "parameters": [ - { - "name": "arrays", - "type": "List[ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "1-D arrays to form the cartesian product of." - }, - { - "name": "out", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array to place the cartesian product in." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate a cartesian product of input arrays.\n\nParameters\n----------\narrays : list of array-like\n 1-D arrays to form the cartesian product of.\nout : ndarray, default=None\n Array to place the cartesian product in.\n\nReturns\n-------\nout : ndarray\n 2-D array of shape (M, len(arrays)) containing cartesian products\n formed of input arrays.\n\nExamples\n--------\n>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\narray([[1, 4, 6],\n [1, 4, 7],\n [1, 5, 6],\n [1, 5, 7],\n [2, 4, 6],\n [2, 4, 7],\n [2, 5, 6],\n [2, 5, 7],\n [3, 4, 6],\n [3, 4, 7],\n [3, 5, 6],\n [3, 5, 7]])\n\nNotes\n-----\nThis function may not be used on more than 32 arrays\nbecause the underlying numpy functions do not support it." - }, - { - "name": "svd_flip", - "decorators": [], - "parameters": [ - { - "name": "u", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "u and v are the output of `linalg.svd` or :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner dimensions so one can compute `np.dot(u * s, v)`." - }, - { - "name": "v", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "u and v are the output of `linalg.svd` or :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner dimensions so one can compute `np.dot(u * s, v)`. The input v should really be called vt to be consistent with scipy's ouput." - }, - { - "name": "u_based_decision", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, use the columns of u as the basis for sign flipping. Otherwise, use the rows of v. The choice of which variable to base the decision on is generally algorithm dependent." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.\n\nParameters\n----------\nu : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n\nv : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n The input v should really be called vt to be consistent with scipy's\n ouput.\n\nu_based_decision : bool, default=True\n If True, use the columns of u as the basis for sign flipping.\n Otherwise, use the rows of v. The choice of which variable to base the\n decision on is generally algorithm dependent.\n\n\nReturns\n-------\nu_adjusted, v_adjusted : arrays with the same dimensions as the input." - }, - { - "name": "log_logistic", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the logistic function." - }, - { - "name": "out", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Preallocated output array." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n -log(1 + exp(-x_i)) if x_i > 0\n x_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.\n\nParameters\n----------\nX : array-like of shape (M, N) or (M,)\n Argument to the logistic function.\n\nout : array-like of shape (M, N) or (M,), default=None\n Preallocated output array.\n\nReturns\n-------\nout : ndarray of shape (M, N) or (M,)\n Log of the logistic function evaluated at every point in x.\n\nNotes\n-----\nSee the blog post describing this implementation:\nhttp://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/" - }, - { - "name": "softmax", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Argument to the logistic function." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Copy X or not." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate the softmax function.\n\nThe softmax function is calculated by\nnp.exp(X) / np.sum(np.exp(X), axis=1)\n\nThis will cause overflow when large values are exponentiated.\nHence the largest value in each row is subtracted from each data\npoint to prevent this.\n\nParameters\n----------\nX : array-like of float of shape (M, N)\n Argument to the logistic function.\n\ncopy : bool, default=True\n Copy X or not.\n\nReturns\n-------\nout : ndarray of shape (M, N)\n Softmax function evaluated at every point in x." - }, - { - "name": "make_nonnegative", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The matrix to make non-negative." - }, - { - "name": "min_value", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The threshold value." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure `X.min()` >= `min_value`.\n\nParameters\n----------\nX : array-like\n The matrix to make non-negative.\nmin_value : float, default=0\n The threshold value.\n\nReturns\n-------\narray-like\n The thresholded array.\n\nRaises\n------\nValueError\n When X is sparse." - }, - { - "name": "_safe_accumulator_op", - "decorators": [], - "parameters": [ - { - "name": "op", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A numpy accumulator function such as np.mean or np.sum." - }, - { - "name": "x", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A numpy array to apply the accumulator function." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Positional arguments passed to the accumulator function after the input x." - }, - { - "name": "**kwargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Keyword arguments passed to the accumulator function." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "This function provides numpy accumulator functions with a float64 dtype\nwhen used on a floating point input. This prevents accumulator overflow on\nsmaller floating point dtypes.\n\nParameters\n----------\nop : function\n A numpy accumulator function such as np.mean or np.sum.\nx : ndarray\n A numpy array to apply the accumulator function.\n*args : positional arguments\n Positional arguments passed to the accumulator function after the\n input x.\n**kwargs : keyword arguments\n Keyword arguments passed to the accumulator function.\n\nReturns\n-------\nresult\n The output of the accumulator function passed to this function." - }, - { - "name": "_incremental_weighted_mean_and_var", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to use for mean and variance update." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights. If None, then samples are equally weighted." - }, - { - "name": "last_mean", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mean before the incremental update." - }, - { - "name": "last_variance", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Variance before the incremental update. If None, variance update is not computed (in case scaling is not required)." - }, - { - "name": "last_weight_sum", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sum of weights before the incremental update." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate weighted mean and weighted variance incremental update.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to use for mean and variance update.\n\nsample_weight : array-like of shape (n_samples,) or None\n Sample weights. If None, then samples are equally weighted.\n\nlast_mean : array-like of shape (n_features,)\n Mean before the incremental update.\n\nlast_variance : array-like of shape (n_features,) or None\n Variance before the incremental update.\n If None, variance update is not computed (in case scaling is not\n required).\n\nlast_weight_sum : array-like of shape (n_features,)\n Sum of weights before the incremental update.\n\nReturns\n-------\nupdated_mean : array of shape (n_features,)\n\nupdated_variance : array of shape (n_features,) or None\n If None, only mean is computed.\n\nupdated_weight_sum : array of shape (n_features,)\n\nNotes\n-----\nNaNs in `X` are ignored.\n\n`last_mean` and `last_variance` are statistics computed at the last step\nby the function. Both must be initialized to 0.0.\nThe mean is always required (`last_mean`) and returned (`updated_mean`),\nwhereas the variance can be None (`last_variance` and `updated_variance`).\n\nFor further details on the algorithm to perform the computation in a\nnumerically stable way, see [Finch2009]_, Sections 4 and 5.\n\nReferences\n----------\n.. [Finch2009] `Tony Finch,\n \"Incremental calculation of weighted mean and variance\",\n University of Cambridge Computing Service, February 2009.\n `_" - }, - { - "name": "_incremental_mean_and_var", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to use for variance update." - }, - { - "name": "last_mean", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "last_variance", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "last_sample_count", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculate mean update and a Youngs and Cramer variance update.\n\nlast_mean and last_variance are statistics computed at the last step by the\nfunction. Both must be initialized to 0.0. In case no scaling is required\nlast_variance can be None. The mean is always required and returned because\nnecessary for the calculation of the variance. last_n_samples_seen is the\nnumber of samples encountered until now.\n\nFrom the paper \"Algorithms for computing the sample variance: analysis and\nrecommendations\", by Chan, Golub, and LeVeque.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to use for variance update.\n\nlast_mean : array-like of shape (n_features,)\n\nlast_variance : array-like of shape (n_features,)\n\nlast_sample_count : array-like of shape (n_features,)\n\nReturns\n-------\nupdated_mean : ndarray of shape (n_features,)\n\nupdated_variance : ndarray of shape (n_features,)\n If None, only mean is computed.\n\nupdated_sample_count : ndarray of shape (n_features,)\n\nNotes\n-----\nNaNs are ignored during the algorithm.\n\nReferences\n----------\nT. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample\n variance: recommendations, The American Statistician, Vol. 37, No. 3,\n pp. 242-247\n\nAlso, see the sparse implementation of this in\n`utils.sparsefuncs.incr_mean_variance_axis` and\n`utils.sparsefuncs_fast.incr_mean_variance_axis0`" - }, - { - "name": "_deterministic_vector_sign_flip", - "decorators": [], - "parameters": [ - { - "name": "u", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array with vectors as its rows." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Modify the sign of vectors for reproducibility.\n\nFlips the sign of elements of all the vectors (rows of u) such that\nthe absolute maximum element of each vector is positive.\n\nParameters\n----------\nu : ndarray\n Array with vectors as its rows.\n\nReturns\n-------\nu_flipped : ndarray with same shape as u\n Array with the sign flipped vectors as its rows." - }, - { - "name": "stable_cumsum", - "decorators": [], - "parameters": [ - { - "name": "arr", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "To be cumulatively summed as flat." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the cumulative sum is computed. The default (None) is to compute the cumsum over the flattened array." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "1e-05", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance, see ``np.allclose``." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "1e-08", - "limitation": null, - "ignored": false, - "docstring": "Absolute tolerance, see ``np.allclose``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Use high precision for cumsum and check that final value matches sum.\n\nParameters\n----------\narr : array-like\n To be cumulatively summed as flat.\naxis : int, default=None\n Axis along which the cumulative sum is computed.\n The default (None) is to compute the cumsum over the flattened array.\nrtol : float, default=1e-05\n Relative tolerance, see ``np.allclose``.\natol : float, default=1e-08\n Absolute tolerance, see ``np.allclose``." - } - ] - }, - { - "name": "sklearn.utils.fixes", - "imports": [ - "from functools import update_wrapper", - "from distutils.version import LooseVersion", - "import functools", - "import numpy as np", - "import scipy.sparse as sp", - "import scipy", - "import scipy.stats", - "from scipy.sparse.linalg import lsqr as sparse_lsqr", - "from numpy.ma import MaskedArray as _MaskedArray", - "from _config import config_context", - "from _config import get_config", - "from deprecation import deprecated", - "from pkg_resources import parse_version", - "from scipy.sparse.linalg import lobpcg", - "from externals._lobpcg import lobpcg", - "import joblib" - ], - "classes": [ - { - "name": "loguniform", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "low", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The minimum value" - }, - { - "name": "high", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum value" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "A class supporting log-uniform random variables.\n\nParameters\n----------\nlow : float\n The minimum value\nhigh : float\n The maximum value\n\nMethods\n-------\nrvs(self, size=None, random_state=None)\n Generate log-uniform random variables\n\nThe most useful method for Scikit-learn usage is highlighted here.\nFor a full list, see\n`scipy.stats.reciprocal\n`_.\nThis list includes all functions of ``scipy.stats`` continuous\ndistributions such as ``pdf``.\n\nNotes\n-----\nThis class generates values between ``low`` and ``high`` or\n\n low <= loguniform(low, high).rvs() <= high\n\nThe logarithmic probability density function (PDF) is uniform. When\n``x`` is a uniformly distributed random variable between 0 and 1, ``10**x``\nare random variables that are equally likely to be returned.\n\nThis class is an alias to ``scipy.stats.reciprocal``, which uses the\nreciprocal distribution:\nhttps://en.wikipedia.org/wiki/Reciprocal_distribution\n\nExamples\n--------\n\n>>> from sklearn.utils.fixes import loguniform\n>>> rv = loguniform(1e-3, 1e1)\n>>> rvs = rv.rvs(random_state=42, size=1000)\n>>> rvs.min() # doctest: +SKIP\n0.0010435856341129003\n>>> rvs.max() # doctest: +SKIP\n9.97403052786026" - }, - { - "name": "MaskedArray", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "_FuncWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "\"Load the global configuration before calling the function." - } - ], - "functions": [ - { - "name": "_object_dtype_isnan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_astype_copy_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns the copy=False parameter for\n{ndarray, csr_matrix, csc_matrix}.astype when possible,\notherwise don't specify" - }, - { - "name": "_joblib_parallel_args", - "decorators": [], - "parameters": [ - { - "name": "prefer", - "type": "Literal['processes', 'threads']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Soft hint to choose the default backend if no specific backend was selected with the parallel_backend context manager." - }, - { - "name": "require", - "type": "Optional[Literal['sharedmem']]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Hard condstraint to select the backend. If set to 'sharedmem', the selected backend will be single-host and thread-based even if the user asked for a non-thread based backend with parallel_backend." - }, - { - "name": "See joblib.Parallel documentation for more details", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+\n\nFor joblib 0.11 this maps both ``prefer`` and ``require`` parameters to\na specific ``backend``.\n\nParameters\n----------\n\nprefer : str in {'processes', 'threads'} or None\n Soft hint to choose the default backend if no specific backend\n was selected with the parallel_backend context manager.\n\nrequire : 'sharedmem' or None\n Hard condstraint to select the backend. If set to 'sharedmem',\n the selected backend will be single-host and thread-based even\n if the user asked for a non-thread based backend with\n parallel_backend.\n\nSee joblib.Parallel documentation for more details" - }, - { - "name": "_take_along_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Implements a simplified version of np.take_along_axis if numpy\nversion < 1.15" - }, - { - "name": "delayed", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorator used to capture the arguments of a function." - } - ] - }, - { - "name": "sklearn.utils.graph", - "imports": [ - "from scipy import sparse", - "from graph_shortest_path import graph_shortest_path", - "from validation import _deprecate_positional_args" - ], - "classes": [], - "functions": [ - { - "name": "single_source_shortest_path_length", - "decorators": [], - "parameters": [ - { - "name": "graph", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Adjacency matrix of the graph. Sparse matrix of format LIL is preferred." - }, - { - "name": "source", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Starting node for path." - }, - { - "name": "cutoff", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Depth to stop the search - only paths of length <= cutoff are returned." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return the shortest path length from source to all reachable nodes.\n\nReturns a dictionary of shortest path lengths keyed by target.\n\nParameters\n----------\ngraph : {sparse matrix, ndarray} of shape (n, n)\n Adjacency matrix of the graph. Sparse matrix of format LIL is\n preferred.\n\nsource : int\n Starting node for path.\n\ncutoff : int, default=None\n Depth to stop the search - only paths of length <= cutoff are returned.\n\nExamples\n--------\n>>> from sklearn.utils.graph import single_source_shortest_path_length\n>>> import numpy as np\n>>> graph = np.array([[ 0, 1, 0, 0],\n... [ 1, 0, 1, 0],\n... [ 0, 1, 0, 1],\n... [ 0, 0, 1, 0]])\n>>> list(sorted(single_source_shortest_path_length(graph, 0).items()))\n[(0, 0), (1, 1), (2, 2), (3, 3)]\n>>> graph = np.ones((6, 6))\n>>> list(sorted(single_source_shortest_path_length(graph, 2).items()))\n[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]" - } - ] - }, - { - "name": "sklearn.utils.metaestimators", - "imports": [ - "from typing import List", - "from typing import Any", - "from abc import ABCMeta", - "from abc import abstractmethod", - "from operator import attrgetter", - "from functools import update_wrapper", - "import numpy as np", - "from utils import _safe_indexing", - "from base import BaseEstimator", - "from base import _is_pairwise" - ], - "classes": [ - { - "name": "_BaseComposition", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_replace_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_validate_names", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Handles parameter management for classifiers composed of named estimators.\n " - }, - { - "name": "_IffHasAttrDescriptor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__get__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif none of the delegates (specified in ``delegate_names``) is an attribute\nof the base object or the first found delegate does not have an attribute\n``attribute_name``.\n\nThis allows ducktyping of the decorated method based on\n``delegate.attribute_name``. Here ``delegate`` is the first item in\n``delegate_names`` for which ``hasattr(object, delegate) is True``.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors." - } - ], - "functions": [ - { - "name": "if_delegate_has_method", - "decorators": [], - "parameters": [ - { - "name": "delegate", - "type": "Union[List, str, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Name of the sub-estimator that can be accessed as an attribute of the base object. If a list or a tuple of names are provided, the first sub-estimator that is an attribute of the base object will be used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\nParameters\n----------\ndelegate : string, list of strings or tuple of strings\n Name of the sub-estimator that can be accessed as an attribute of the\n base object. If a list or a tuple of names are provided, the first\n sub-estimator that is an attribute of the base object will be used." - }, - { - "name": "_safe_split", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Estimator to determine whether we should slice only rows or rows and columns." - }, - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data to be indexed. If ``estimator._pairwise is True``, this needs to be a square array-like or sparse matrix." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Targets to be indexed." - }, - { - "name": "indices", - "type": "Array[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Rows to select from X and y. If ``estimator._pairwise is True`` and ``train_indices is None`` then ``indices`` will also be used to slice columns." - }, - { - "name": "train_indices", - "type": "Array[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If ``estimator._pairwise is True`` and ``train_indices is not None``, then ``train_indices`` will be use to slice the columns of X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create subset of dataset and properly handle kernels.\n\nSlice X, y according to indices for cross-validation, but take care of\nprecomputed kernel-matrices or pairwise affinities / distances.\n\nIf ``estimator._pairwise is True``, X needs to be square and\nwe slice rows and columns. If ``train_indices`` is not None,\nwe slice rows using ``indices`` (assumed the test set) and columns\nusing ``train_indices``, indicating the training set.\n\n.. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.26) and onward, this function will check for the\n pairwise estimator tag.\n\nLabels y will always be indexed only along the first axis.\n\nParameters\n----------\nestimator : object\n Estimator to determine whether we should slice only rows or rows and\n columns.\n\nX : array-like, sparse matrix or iterable\n Data to be indexed. If ``estimator._pairwise is True``,\n this needs to be a square array-like or sparse matrix.\n\ny : array-like, sparse matrix or iterable\n Targets to be indexed.\n\nindices : array of int\n Rows to select from X and y.\n If ``estimator._pairwise is True`` and ``train_indices is None``\n then ``indices`` will also be used to slice columns.\n\ntrain_indices : array of int or None, default=None\n If ``estimator._pairwise is True`` and ``train_indices is not None``,\n then ``train_indices`` will be use to slice the columns of X.\n\nReturns\n-------\nX_subset : array-like, sparse matrix or list\n Indexed data.\n\ny_subset : array-like, sparse matrix or list\n Indexed targets." - } - ] - }, - { - "name": "sklearn.utils.multiclass", - "imports": [ - "from collections.abc import Sequence", - "from itertools import chain", - "import warnings", - "from scipy.sparse import issparse", - "from scipy.sparse.base import spmatrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "import numpy as np", - "from validation import check_array", - "from validation import _assert_all_finite" - ], - "classes": [], - "functions": [ - { - "name": "_unique_multiclass", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_unique_indicator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "unique_labels", - "decorators": [], - "parameters": [ - { - "name": "*ys", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract an ordered array of unique labels.\n\nWe don't allow:\n - mix of multilabel and multiclass (single label) targets\n - mix of label indicator matrix and anything else,\n because there are no explicit labels)\n - mix of label indicator matrices of different sizes\n - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.\n\nParameters\n----------\n*ys : array-likes\n\nReturns\n-------\nout : ndarray of shape (n_unique_labels,)\n An ordered array of unique labels.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import unique_labels\n>>> unique_labels([3, 5, 5, 5, 7, 7])\narray([3, 5, 7])\n>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\narray([1, 2, 3, 4])\n>>> unique_labels([1, 2, 10], [5, 11])\narray([ 1, 2, 5, 10, 11])" - }, - { - "name": "_is_integral_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "is_multilabel", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if ``y`` is in a multilabel format.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\nout : bool\n Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils.multiclass import is_multilabel\n>>> is_multilabel([0, 1, 0, 1])\nFalse\n>>> is_multilabel([[1], [0, 2], []])\nFalse\n>>> is_multilabel(np.array([[1, 0], [0, 0]]))\nTrue\n>>> is_multilabel(np.array([[1], [0], [0]]))\nFalse\n>>> is_multilabel(np.array([[1, 0, 0]]))\nTrue" - }, - { - "name": "check_classification_targets", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n 'binary', 'multiclass', 'multiclass-multioutput',\n 'multilabel-indicator', 'multilabel-sequences'\n\nParameters\n----------\ny : array-like" - }, - { - "name": "type_of_target", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n * ``binary`` is more specific but compatible with ``multiclass``.\n * ``multiclass`` of integers is more specific but compatible with\n ``continuous``.\n * ``multilabel-indicator`` is more specific but compatible with\n ``multiclass-multioutput``.\n\nParameters\n----------\ny : array-like\n\nReturns\n-------\ntarget_type : str\n One of:\n\n * 'continuous': `y` is an array-like of floats that are not all\n integers, and is 1d or a column vector.\n * 'continuous-multioutput': `y` is a 2d array of floats that are\n not all integers, and both dimensions are of size > 1.\n * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n vector.\n * 'multiclass': `y` contains more than two discrete values, is not a\n sequence of sequences, and is 1d or a column vector.\n * 'multiclass-multioutput': `y` is a 2d array that contains more\n than two discrete values, is not a sequence of sequences, and both\n dimensions are of size > 1.\n * 'multilabel-indicator': `y` is a label indicator matrix, an array\n of two dimensions with at least two columns, and at most 2 unique\n values.\n * 'unknown': `y` is array-like but none of the above, such as a 3d\n array, sequence of sequences, or an array of non-sequence objects.\n\nExamples\n--------\n>>> import numpy as np\n>>> type_of_target([0.1, 0.6])\n'continuous'\n>>> type_of_target([1, -1, -1, 1])\n'binary'\n>>> type_of_target(['a', 'b', 'a'])\n'binary'\n>>> type_of_target([1.0, 2.0])\n'binary'\n>>> type_of_target([1, 0, 2])\n'multiclass'\n>>> type_of_target([1.0, 0.0, 3.0])\n'multiclass'\n>>> type_of_target(['a', 'b', 'c'])\n'multiclass'\n>>> type_of_target(np.array([[1, 2], [3, 1]]))\n'multiclass-multioutput'\n>>> type_of_target([[1, 2]])\n'multilabel-indicator'\n>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n'continuous-multioutput'\n>>> type_of_target(np.array([[0, 1], [1, 1]]))\n'multilabel-indicator'" - }, - { - "name": "_check_partial_fit_first_call", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Private helper function for factorizing common classes param logic.\n\nEstimators that implement the ``partial_fit`` API need to be provided with\nthe list of possible classes at the first call to partial_fit.\n\nSubsequent calls to partial_fit should check that ``classes`` is still\nconsistent with a previous value of ``clf.classes_`` when provided.\n\nThis function returns True if it detects that this was the first call to\n``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also\nset on ``clf``." - }, - { - "name": "class_distribution", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The labels for each example." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sample weights." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute class priors from multioutput-multiclass target data.\n\nParameters\n----------\ny : {array-like, sparse matrix} of size (n_samples, n_outputs)\n The labels for each example.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nclasses : list of size n_outputs of ndarray of size (n_classes,)\n List of classes for each column.\n\nn_classes : list of int of size n_outputs\n Number of classes in each column.\n\nclass_prior : list of size n_outputs of ndarray of size (n_classes,)\n Class distribution of each column." - }, - { - "name": "_ovr_decision_function", - "decorators": [], - "parameters": [ - { - "name": "predictions", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted classes for each binary classifier." - }, - { - "name": "confidences", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Decision functions or predicted probabilities for positive class for each binary classifier." - }, - { - "name": "n_classes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of classes. n_classifiers must be ``n_classes * (n_classes - 1 ) / 2``." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute a continuous, tie-breaking OvR decision function from OvO.\n\nIt is important to include a continuous value, not only votes,\nto make computing AUC or calibration meaningful.\n\nParameters\n----------\npredictions : array-like of shape (n_samples, n_classifiers)\n Predicted classes for each binary classifier.\n\nconfidences : array-like of shape (n_samples, n_classifiers)\n Decision functions or predicted probabilities for positive class\n for each binary classifier.\n\nn_classes : int\n Number of classes. n_classifiers must be\n ``n_classes * (n_classes - 1 ) / 2``." - } - ] - }, - { - "name": "sklearn.utils.optimize", - "imports": [ - "import numpy as np", - "import warnings", - "from scipy.optimize.linesearch import line_search_wolfe2", - "from scipy.optimize.linesearch import line_search_wolfe1", - "from exceptions import ConvergenceWarning" - ], - "classes": [ - { - "name": "_LineSearchError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "_line_search_wolfe12", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Same as line_search_wolfe1, but fall back to line_search_wolfe2 if\nsuitable step length is not found, and raise an exception if a\nsuitable step length is not found.\n\nRaises\n------\n_LineSearchError\n If no suitable step size is found." - }, - { - "name": "_cg", - "decorators": [], - "parameters": [ - { - "name": "fhess_p", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function that takes the gradient as a parameter and returns the matrix product of the Hessian and gradient." - }, - { - "name": "fgrad", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Gradient vector." - }, - { - "name": "maxiter", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of CG iterations." - }, - { - "name": "tol", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Solve iteratively the linear system 'fhess_p . xsupi = fgrad'\nwith a conjugate gradient descent.\n\nParameters\n----------\nfhess_p : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient.\n\nfgrad : ndarray of shape (n_features,) or (n_features + 1,)\n Gradient vector.\n\nmaxiter : int\n Number of CG iterations.\n\ntol : float\n Stopping criterion.\n\nReturns\n-------\nxsupi : ndarray of shape (n_features,) or (n_features + 1,)\n Estimated solution." - }, - { - "name": "_newton_cg", - "decorators": [], - "parameters": [ - { - "name": "grad_hess", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return the gradient and a callable returning the matvec product of the Hessian." - }, - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return the value of the function." - }, - { - "name": "grad", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Should return the function value and the gradient. This is used by the linesearch functions." - }, - { - "name": "x0", - "type": "Array[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Initial guess." - }, - { - "name": "args", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Arguments passed to func_grad_hess, func and grad." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-4", - "limitation": null, - "ignored": false, - "docstring": "Stopping criterion. The iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol`` where ``g_i`` is the i-th component of the gradient." - }, - { - "name": "maxiter", - "type": "int", - "hasDefault": true, - "default": "100", - "limitation": null, - "ignored": false, - "docstring": "Number of Newton iterations." - }, - { - "name": "maxinner", - "type": "int", - "hasDefault": true, - "default": "200", - "limitation": null, - "ignored": false, - "docstring": "Number of CG iterations." - }, - { - "name": "line_search", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to use a line search or not." - }, - { - "name": "warn", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to warn when didn't converge." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Minimization of scalar function of one or more variables using the\nNewton-CG algorithm.\n\nParameters\n----------\ngrad_hess : callable\n Should return the gradient and a callable returning the matvec product\n of the Hessian.\n\nfunc : callable\n Should return the value of the function.\n\ngrad : callable\n Should return the function value and the gradient. This is used\n by the linesearch functions.\n\nx0 : array of float\n Initial guess.\n\nargs : tuple, default=()\n Arguments passed to func_grad_hess, func and grad.\n\ntol : float, default=1e-4\n Stopping criterion. The iteration will stop when\n ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\nmaxiter : int, default=100\n Number of Newton iterations.\n\nmaxinner : int, default=200\n Number of CG iterations.\n\nline_search : bool, default=True\n Whether to use a line search or not.\n\nwarn : bool, default=True\n Whether to warn when didn't converge.\n\nReturns\n-------\nxk : ndarray of float\n Estimated minimum." - }, - { - "name": "_check_optimize_result", - "decorators": [], - "parameters": [ - { - "name": "solver", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Solver name. Currently only `lbfgs` is supported." - }, - { - "name": "result", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Result of the scipy.optimize.minimize function." - }, - { - "name": "max_iter", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Expected maximum number of iterations." - }, - { - "name": "extra_warning_msg", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Extra warning message." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the OptimizeResult for successful convergence\n\nParameters\n----------\nsolver : str\n Solver name. Currently only `lbfgs` is supported.\n\nresult : OptimizeResult\n Result of the scipy.optimize.minimize function.\n\nmax_iter : int, default=None\n Expected maximum number of iterations.\n\nextra_warning_msg : str, default=None\n Extra warning message.\n\nReturns\n-------\nn_iter : int\n Number of iterations." - } - ] - }, - { - "name": "sklearn.utils.random", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "import array", - "from None import check_random_state", - "from _random import sample_without_replacement" - ], - "classes": [], - "functions": [ - { - "name": "_random_choice_csc", - "decorators": [], - "parameters": [ - { - "name": "n_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to draw in each column." - }, - { - "name": "classes", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of classes for each column." - }, - { - "name": "class_probability", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Class distribution of each column. If None, uniform distribution is assumed." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Controls the randomness of the sampled classes. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Generate a sparse random matrix given column class distributions\n\nParameters\n----------\nn_samples : int,\n Number of samples to draw in each column.\n\nclasses : list of size n_outputs of arrays of size (n_classes,)\n List of classes for each column.\n\nclass_probability : list of size n_outputs of arrays of shape (n_classes,), default=None\n Class distribution of each column. If None, uniform distribution is\n assumed.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the sampled classes.\n See :term:`Glossary `.\n\nReturns\n-------\nrandom_matrix : sparse csc matrix of size (n_samples, n_outputs)" - } - ] - }, - { - "name": "sklearn.utils.setup", - "imports": [ - "import os", - "from os.path import join", - "from sklearn._build_utils import gen_from_templates", - "import numpy", - "from numpy.distutils.misc_util import Configuration", - "from numpy.distutils.core import setup" - ], - "classes": [], - "functions": [ - { - "name": "configuration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.sparsefuncs", - "imports": [ - "import scipy.sparse as sp", - "import numpy as np", - "from validation import _deprecate_positional_args", - "from sparsefuncs_fast import csr_mean_variance_axis0 as _csr_mean_var_axis0", - "from sparsefuncs_fast import csc_mean_variance_axis0 as _csc_mean_var_axis0", - "from sparsefuncs_fast import incr_mean_variance_axis0 as _incr_mean_var_axis0", - "from utils.validation import _check_sample_weight" - ], - "classes": [], - "functions": [ - { - "name": "_raise_typeerror", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raises a TypeError if X is not a CSR or CSC matrix" - }, - { - "name": "_raise_error_wrong_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "inplace_csr_column_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to normalize using the variance of the features. It should be of CSR format." - }, - { - "name": "scale", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed feature-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace column scaling of a CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features.\n It should be of CSR format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling." - }, - { - "name": "inplace_csr_row_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to be scaled. It should be of CSR format." - }, - { - "name": "scale", - "type": "NDArray[float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed sample-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace row scaling of a CSR matrix.\n\nScale each sample of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR format.\n\nscale : ndarray of float of shape (n_samples,)\n Array of precomputed sample-wise values to use for scaling." - }, - { - "name": "mean_variance_axis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It can be of CSR or CSC format." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the axis should be computed." - }, - { - "name": "weights", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "if axis is set to 0 shape is (n_samples,) or if axis is set to 1 shape is (n_features,). If it is set to None, then samples are equally weighted. .. versionadded:: 0.24" - }, - { - "name": "return_sum_weights", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, returns the sum of weights seen for each feature if `axis=0` or each sample if `axis=1`. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute mean and variance along an axis on a CSR or CSC matrix.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It can be of CSR or CSC format.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n if axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\nreturn_sum_weights : bool, default=False\n If True, returns the sum of weights seen for each feature\n if `axis=0` or each sample if `axis=1`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\n\nmeans : ndarray of shape (n_features,), dtype=floating\n Feature-wise means.\n\nvariances : ndarray of shape (n_features,), dtype=floating\n Feature-wise variances.\n\nsum_weights : ndarray of shape (n_features,), dtype=floating\n Returned if `return_sum_weights` is `True`." - }, - { - "name": "incr_mean_variance_axis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the axis should be computed." - }, - { - "name": "last_mean", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of means to update with the new data X. Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1." - }, - { - "name": "last_var", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of variances to update with the new data X. Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1." - }, - { - "name": "last_n", - "type": "Union[NDArray, float]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Sum of the weights seen so far, excluding the current weights If not float, it should be of shape (n_samples,) if axis=0 or (n_features,) if axis=1. If float it corresponds to having same weights for all samples (or features)." - }, - { - "name": "weights", - "type": "NDArray", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If axis is set to 0 shape is (n_samples,) or if axis is set to 1 shape is (n_features,). If it is set to None, then samples are equally weighted. .. versionadded:: 0.24" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute incremental mean and variance along an axis on a CSR or\nCSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.\n\nParameters\n----------\nX : CSR or CSC sparse matrix of shape (n_samples, n_features)\n Input data.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nlast_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of means to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of variances to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_n : float or ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Sum of the weights seen so far, excluding the current weights\n If not float, it should be of shape (n_samples,) if\n axis=0 or (n_features,) if axis=1. If float it corresponds to\n having same weights for all samples (or features).\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n If axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nmeans : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise means if axis = 0 or\n sample-wise means if axis = 1.\n\nvariances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise variances if axis = 0 or\n sample-wise variances if axis = 1.\n\nn : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n Updated number of seen samples per feature if axis=0\n or number of seen features per sample if axis=1.\n\n If weights is not None, n is a sum of the weights of the seen\n samples or features instead of the actual number of seen\n samples or features.\n\nNotes\n-----\nNaNs are ignored in the algorithm." - }, - { - "name": "inplace_column_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to normalize using the variance of the features. It should be of CSC or CSR format." - }, - { - "name": "scale", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed feature-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace column scaling of a CSC/CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features. It should be\n of CSC or CSR format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling." - }, - { - "name": "inplace_row_scale", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix to be scaled. It should be of CSR or CSC format." - }, - { - "name": "scale", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of precomputed sample-wise values to use for scaling." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Inplace row scaling of a CSR or CSC matrix.\n\nScale each row of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR or CSC format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed sample-wise values to use for scaling." - }, - { - "name": "inplace_swap_row_csc", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two rows are to be swapped. It should be of CSC format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two rows of a CSC matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSC format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped." - }, - { - "name": "inplace_swap_row_csr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two rows are to be swapped. It should be of CSR format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two rows of a CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSR format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped." - }, - { - "name": "inplace_swap_row", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two rows are to be swapped. It should be of CSR or CSC format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the row of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two rows of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of CSR or\n CSC format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped." - }, - { - "name": "inplace_swap_column", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Matrix whose two columns are to be swapped. It should be of CSR or CSC format." - }, - { - "name": "m", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the column of X to be swapped." - }, - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Index of the column of X to be swapped." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Swaps two columns of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two columns are to be swapped. It should be of\n CSR or CSC format.\n\nm : int\n Index of the column of X to be swapped.\n\nn : int\n Index of the column of X to be swapped." - }, - { - "name": "_minor_reduce", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_min_or_max_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_min_or_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sparse_nan_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "min_max_axis", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It should be of CSR or CSC format." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Axis along which the axis should be computed." - }, - { - "name": "ignore_nan", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Ignore or passing through NaN values. .. versionadded:: 0.20" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute minimum and maximum along an axis on a CSR or CSC matrix and\noptionally ignore NaN values.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSR or CSC format.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nignore_nan : bool, default=False\n Ignore or passing through NaN values.\n\n .. versionadded:: 0.20\n\nReturns\n-------\n\nmins : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise minima.\n\nmaxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise maxima." - }, - { - "name": "count_nonzero", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It should be of CSR format." - }, - { - "name": "axis", - "type": "Literal[0, 1]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The axis on which the data is aggregated." - }, - { - "name": "sample_weight", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weight for each row of X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "A variant of X.getnnz() with extension to weighting on axis 0\n\nUseful in efficiently calculating multilabel metrics.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_labels)\n Input data. It should be of CSR format.\n\naxis : {0, 1}, default=None\n The axis on which the data is aggregated.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight for each row of X." - }, - { - "name": "_get_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the median of data with n_zeros additional zeros.\n\nThis function is used to support sparse matrices; it modifies data\nin-place." - }, - { - "name": "_get_elem_at_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the value in data augmented with n_zeros for the given rank" - }, - { - "name": "csc_median_axis_0", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data. It should be of CSC format." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Find the median across axis 0 of a CSC matrix.\nIt is equivalent to doing np.median(X, axis=0).\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSC format.\n\nReturns\n-------\nmedian : ndarray of shape (n_features,)\n Median." - } - ] - }, - { - "name": "sklearn.utils.stats", - "imports": [ - "import numpy as np", - "from extmath import stable_cumsum", - "from fixes import _take_along_axis" - ], - "classes": [], - "functions": [ - { - "name": "_weighted_percentile", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to take the weighted percentile of." - }, - { - "name": "sample_weight: 1D or 2D array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Weights for each value in `array`. Must be same shape as `array` or of shape `(array.shape[0],)`." - }, - { - "name": "percentile: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Percentile to compute. Must be value between 0 and 100." - }, - { - "name": "default=50", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Percentile to compute. Must be value between 0 and 100." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n .. versionchanged:: 0.24\n Accepts 2D `array`.\n\nParameters\n----------\narray : 1D or 2D array\n Values to take the weighted percentile of.\n\nsample_weight: 1D or 2D array\n Weights for each value in `array`. Must be same shape as `array` or\n of shape `(array.shape[0],)`.\n\npercentile: int, default=50\n Percentile to compute. Must be value between 0 and 100.\n\nReturns\n-------\npercentile : int if `array` 1D, ndarray if `array` 2D\n Weighted percentile." - } - ] - }, - { - "name": "sklearn.utils.validation", - "imports": [ - "from functools import wraps", - "import warnings", - "import numbers", - "import numpy as np", - "import scipy.sparse as sp", - "from inspect import signature", - "from inspect import isclass", - "from inspect import Parameter", - "from numpy.core.numeric import ComplexWarning", - "import joblib", - "from contextlib import suppress", - "from fixes import _object_dtype_isnan", - "from fixes import parse_version", - "from None import get_config as _get_config", - "from exceptions import PositiveSpectrumWarning", - "from exceptions import NotFittedError", - "from exceptions import DataConversionWarning", - "from extmath import _safe_accumulator_op", - "from pandas.api.types import is_sparse", - "from pandas import Int8Dtype", - "from pandas import Int16Dtype", - "from pandas import Int32Dtype", - "from pandas import Int64Dtype", - "from pandas import UInt8Dtype", - "from pandas import UInt16Dtype", - "from pandas import UInt32Dtype", - "from pandas import UInt64Dtype", - "from None import _safe_indexing" - ], - "classes": [], - "functions": [ - { - "name": "_deprecate_positional_args", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Function to check arguments on." - }, - { - "name": "version", - "type": "Callable", - "hasDefault": true, - "default": "\"1", - "limitation": null, - "ignored": false, - "docstring": "The version when positional arguments will result in error." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorator for methods that issues warnings for positional arguments.\n\nUsing the keyword-only argument syntax in pep 3102, arguments after the\n* will issue a warning when passed as a positional argument.\n\nParameters\n----------\nfunc : callable, default=None\n Function to check arguments on.\nversion : callable, default=\"1.0 (renaming of 0.25)\"\n The version when positional arguments will result in error." - }, - { - "name": "_assert_all_finite", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Like assert_all_finite, but only for ndarray." - }, - { - "name": "assert_all_finite", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "allow_nan", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Throw a ValueError if X contains NaN or infinity.\n\nParameters\n----------\nX : {ndarray, sparse matrix}\n\nallow_nan : bool, default=False" - }, - { - "name": "as_float_array", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, a copy of X will be created. If False, a copy may still be returned if X's dtype is not a floating point type." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The possibilities are: - True: Force all values of X to be finite. - False: accepts np.inf, np.nan, pd.NA in X. - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Converts an array-like to an array of floats.\n\nThe new dtype will be np.float32 or np.float64, depending on the original\ntype. The function can create a copy or modify the argument depending\non the argument copy.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n\ncopy : bool, default=True\n If True, a copy of X will be created. If False, a copy may still be\n returned if X's dtype is not a floating point type.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nReturns\n-------\nXT : {ndarray, sparse matrix}\n An array of type float." - }, - { - "name": "_is_arraylike", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns whether the input is array-like." - }, - { - "name": "_num_samples", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return number of samples in array-like x." - }, - { - "name": "check_memory", - "decorators": [], - "parameters": [ - { - "name": "memory", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Check that ``memory`` is joblib.Memory-like.\n\njoblib.Memory-like means that ``memory`` can be converted into a\njoblib.Memory instance (typically a str denoting the ``location``)\nor has the same interface (has a ``cache`` method).\n\nParameters\n----------\nmemory : None, str or object with the joblib.Memory interface\n\nReturns\n-------\nmemory : object with the joblib.Memory interface\n\nRaises\n------\nValueError\n If ``memory`` is not joblib.Memory-like." - }, - { - "name": "check_consistent_length", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": "Union[List, Tuple[]]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Objects that will be checked for consistent length." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that all arrays have consistent first dimensions.\n\nChecks whether all objects in arrays have the same shape or length.\n\nParameters\n----------\n*arrays : list or tuple of input objects.\n Objects that will be checked for consistent length." - }, - { - "name": "_make_indexable", - "decorators": [], - "parameters": [ - { - "name": "iterable", - "type": "Union[List, NDArray]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Object to be converted to an indexable iterable." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ensure iterable supports indexing or convert to an indexable variant.\n\nConvert sparse matrices to csr and other non-indexable iterable to arrays.\nLet `None` and indexable objects (e.g. pandas dataframes) pass unchanged.\n\nParameters\n----------\niterable : {list, dataframe, ndarray, sparse matrix} or None\n Object to be converted to an indexable iterable." - }, - { - "name": "indexable", - "decorators": [], - "parameters": [ - { - "name": "*iterables", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of objects to ensure sliceability." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make arrays indexable for cross-validation.\n\nChecks consistent length, passes through None, and ensures that everything\ncan be indexed by converting sparse matrices to csr and converting\nnon-interable objects to arrays.\n\nParameters\n----------\n*iterables : {lists, dataframes, ndarrays, sparse matrices}\n List of objects to ensure sliceability." - }, - { - "name": "_ensure_sparse_format", - "decorators": [], - "parameters": [ - { - "name": "spmatrix", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input to validate and convert." - }, - { - "name": "accept_sparse", - "type": "Union[bool, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats ('csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "dtype", - "type": "Optional[str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data type of result. If None, the dtype of the input is preserved." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The possibilities are: - True: Force all values of X to be finite. - False: accepts np.inf, np.nan, pd.NA in X. - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert a sparse matrix to a given format.\n\nChecks the sparse format of spmatrix and converts if necessary.\n\nParameters\n----------\nspmatrix : sparse matrix\n Input to validate and convert.\n\naccept_sparse : str, bool or list/tuple of str\n String[s] representing allowed sparse matrix formats ('csc',\n 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but\n not in the allowed format, it will be converted to the first listed\n format. True allows the input to be any format. False means\n that a sparse matrix input will raise an error.\n\ndtype : str, type or None\n Data type of result. If None, the dtype of the input is preserved.\n\ncopy : bool\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan'\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nReturns\n-------\nspmatrix_converted : sparse matrix.\n Matrix that is ensured to have an allowed type." - }, - { - "name": "_ensure_no_complex_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_array", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input object to check / convert." - }, - { - "name": "accept_sparse", - "type": "Union[bool, str]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "accept_large_sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by accept_sparse, accept_large_sparse=False will cause it to be accepted only if its indices are stored with a 32-bit dtype. .. versionadded:: 0.20" - }, - { - "name": "dtype", - "type": "Union[List, Literal['numeric']]", - "hasDefault": true, - "default": "'numeric'", - "limitation": null, - "ignored": false, - "docstring": "Data type of result. If None, the dtype of the input is preserved. If \"numeric\", dtype is preserved unless array.dtype is object. If dtype is a list of types, conversion on the first type is only performed if the dtype of the input is not in the list." - }, - { - "name": "order", - "type": "Literal['F', 'C']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether an array will be forced to be fortran or c-style. When order is None (default), then if copy=False, nothing is ensured about the memory layout of the output array; otherwise (copy=True) the memory layout of the returned array is kept as close as possible to the original array." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: - True: Force all values of array to be finite. - False: accepts np.inf, np.nan, pd.NA in array. - 'allow-nan': accepts only np.nan and pd.NA values in array. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - }, - { - "name": "ensure_2d", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise a value error if array is not 2D." - }, - { - "name": "allow_nd", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to allow array.ndim > 2." - }, - { - "name": "ensure_min_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that the array has a minimum number of samples in its first axis (rows for a 2D array). Setting to 0 disables this check." - }, - { - "name": "ensure_min_features", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that the 2D array has some minimum number of features (columns). The default value of 1 rejects empty datasets. This check is only enforced when the input data has effectively 2 dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check." - }, - { - "name": "estimator", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If passed, include the name of the estimator in warning messages." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Input validation on an array, list, sparse matrix or similar.\n\nBy default, the input is checked to be a non-empty 2D array containing\nonly finite values. If the dtype of the array is object, attempt\nconverting to float, raising on failure.\n\nParameters\n----------\narray : object\n Input object to check / convert.\n\naccept_sparse : str, bool or list/tuple of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\naccept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse=False will cause it to be accepted\n only if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'} or None, default=None\n Whether an array will be forced to be fortran or c-style.\n When order is None (default), then if copy=False, nothing is ensured\n about the memory layout of the output array; otherwise (copy=True)\n the memory layout of the returned array is kept as close as possible\n to the original array.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n Whether to raise a value error if array is not 2D.\n\nallow_nd : bool, default=False\n Whether to allow array.ndim > 2.\n\nensure_min_samples : int, default=1\n Make sure that the array has a minimum number of samples in its first\n axis (rows for a 2D array). Setting to 0 disables this check.\n\nensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when the input data has effectively 2\n dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n disables this check.\n\nestimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\nReturns\n-------\narray_converted : object\n The converted and validated array." - }, - { - "name": "_check_large_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raise a ValueError if X has 64bit indices and accept_large_sparse=False\n " - }, - { - "name": "check_X_y", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[NDArray, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "y", - "type": "Union[NDArray, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Labels." - }, - { - "name": "accept_sparse", - "type": "Union[bool, str, List[str]]", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error." - }, - { - "name": "accept_large_sparse", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by accept_sparse, accept_large_sparse will cause it to be accepted only if its indices are stored with a 32-bit dtype. .. versionadded:: 0.20" - }, - { - "name": "dtype", - "type": "Union[List, Literal['numeric']]", - "hasDefault": true, - "default": "'numeric'", - "limitation": null, - "ignored": false, - "docstring": "Data type of result. If None, the dtype of the input is preserved. If \"numeric\", dtype is preserved unless array.dtype is object. If dtype is a list of types, conversion on the first type is only performed if the dtype of the input is not in the list." - }, - { - "name": "order", - "type": "Literal['F', 'C']", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Whether an array will be forced to be fortran or c-style." - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion." - }, - { - "name": "force_all_finite", - "type": "Union[Literal['allow-nan'], bool]", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter does not influence whether y can have np.inf, np.nan, pd.NA values. The possibilities are: - True: Force all values of X to be finite. - False: accepts np.inf, np.nan, pd.NA in X. - 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot be infinite. .. versionadded:: 0.20 ``force_all_finite`` accepts the string ``'allow-nan'``. .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan`" - }, - { - "name": "ensure_2d", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to raise a value error if X is not 2D." - }, - { - "name": "allow_nd", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to allow X.ndim > 2." - }, - { - "name": "multi_output", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to allow 2D y (array or sparse matrix). If false, y will be validated as a vector. y cannot have np.nan or np.inf values if multi_output=True." - }, - { - "name": "ensure_min_samples", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that X has a minimum number of samples in its first axis (rows for a 2D array)." - }, - { - "name": "ensure_min_features", - "type": "int", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Make sure that the 2D array has some minimum number of features (columns). The default value of 1 rejects empty datasets. This check is only enforced when X has effectively 2 dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check." - }, - { - "name": "y_numeric", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "Whether to ensure that y has a numeric type. If dtype of y is object, it is converted to float64. Should only be used for regression algorithms." - }, - { - "name": "estimator", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If passed, include the name of the estimator in warning messages." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Input validation for standard estimators.\n\nChecks X and y for consistent length, enforces X to be 2D and y 1D. By\ndefault, X is checked to be non-empty and containing only finite values.\nStandard input checks are also applied to y, such as checking that y\ndoes not have np.nan or np.inf targets. For multi-label y, set\nmulti_output=True to allow 2D and sparse y. If the dtype of X is\nobject, attempt converting to float, raising on failure.\n\nParameters\n----------\nX : {ndarray, list, sparse matrix}\n Input data.\n\ny : {ndarray, list, sparse matrix}\n Labels.\n\naccept_sparse : str, bool or list of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\naccept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse will cause it to be accepted only\n if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'}, default=None\n Whether an array will be forced to be fortran or c-style.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\n does not influence whether y can have np.inf, np.nan, pd.NA values.\n The possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n Whether to raise a value error if X is not 2D.\n\nallow_nd : bool, default=False\n Whether to allow X.ndim > 2.\n\nmulti_output : bool, default=False\n Whether to allow 2D y (array or sparse matrix). If false, y will be\n validated as a vector. y cannot have np.nan or np.inf values if\n multi_output=True.\n\nensure_min_samples : int, default=1\n Make sure that X has a minimum number of samples in its first\n axis (rows for a 2D array).\n\nensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when X has effectively 2 dimensions or\n is originally 1D and ``ensure_2d`` is True. Setting to 0 disables\n this check.\n\ny_numeric : bool, default=False\n Whether to ensure that y has a numeric type. If dtype of y is object,\n it is converted to float64. Should only be used for regression\n algorithms.\n\nestimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\nReturns\n-------\nX_converted : object\n The converted and validated X.\n\ny_converted : object\n The converted and validated y." - }, - { - "name": "column_or_1d", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "warn", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "To control display of warnings." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Ravel column or 1d numpy array, else raises an error.\n\nParameters\n----------\ny : array-like\n\nwarn : bool, default=False\n To control display of warnings.\n\nReturns\n-------\ny : ndarray" - }, - { - "name": "check_random_state", - "decorators": [], - "parameters": [ - { - "name": "seed", - "type": "Optional[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If seed is None, return the RandomState singleton used by np.random. If seed is an int, return a new RandomState instance seeded with seed. If seed is already a RandomState instance, return it. Otherwise raise ValueError." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Turn seed into a np.random.RandomState instance\n\nParameters\n----------\nseed : None, int or instance of RandomState\n If seed is None, return the RandomState singleton used by np.random.\n If seed is an int, return a new RandomState instance seeded with seed.\n If seed is already a RandomState instance, return it.\n Otherwise raise ValueError." - }, - { - "name": "has_fit_parameter", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An estimator to inspect." - }, - { - "name": "parameter", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The searched parameter." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Checks whether the estimator's fit method supports the given parameter.\n\nParameters\n----------\nestimator : object\n An estimator to inspect.\n\nparameter : str\n The searched parameter.\n\nReturns\n-------\nis_parameter: bool\n Whether the parameter was found to be a named parameter of the\n estimator's fit method.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> has_fit_parameter(SVC(), \"sample_weight\")\nTrue" - }, - { - "name": "check_symmetric", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input object to check / convert. Must be two-dimensional and square, otherwise a ValueError will be raised." - }, - { - "name": "tol", - "type": "float", - "hasDefault": true, - "default": "1e-10", - "limitation": null, - "ignored": false, - "docstring": "Absolute tolerance for equivalence of arrays. Default = 1E-10." - }, - { - "name": "raise_warning", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True then raise a warning if conversion is required." - }, - { - "name": "raise_exception", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True then raise an exception if array is not symmetric." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Make sure that array is 2D, square and symmetric.\n\nIf the array is not symmetric, then a symmetrized version is returned.\nOptionally, a warning or exception is raised if the matrix is not\nsymmetric.\n\nParameters\n----------\narray : {ndarray, sparse matrix}\n Input object to check / convert. Must be two-dimensional and square,\n otherwise a ValueError will be raised.\n\ntol : float, default=1e-10\n Absolute tolerance for equivalence of arrays. Default = 1E-10.\n\nraise_warning : bool, default=True\n If True then raise a warning if conversion is required.\n\nraise_exception : bool, default=False\n If True then raise an exception if array is not symmetric.\n\nReturns\n-------\narray_sym : {ndarray, sparse matrix}\n Symmetrized version of the input array, i.e. the average of array\n and array.transpose(). If sparse, then duplicate entries are first\n summed and zeros are eliminated." - }, - { - "name": "check_is_fitted", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "estimator instance for which the check is performed." - }, - { - "name": "attributes", - "type": "Union[List, str, Tuple[str]]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Attribute name(s) given as string or a list/tuple of strings Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"`` If `None`, `estimator` is considered fitted if there exist an attribute that ends with a underscore and does not start with double underscore." - }, - { - "name": "msg", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The default error message is, \"This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.\" For custom messages if \"%(name)s\" is present in the message string, it is substituted for the estimator name. Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\"." - }, - { - "name": "all_or_any", - "type": "Any", - "hasDefault": true, - "default": "all", - "limitation": null, - "ignored": false, - "docstring": "Specify whether all or any of the given attributes must exist." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nThis utility is meant to be used internally by estimators themselves,\ntypically in their own predict / transform methods.\n\nParameters\n----------\nestimator : estimator instance\n estimator instance for which the check is performed.\n\nattributes : str, list or tuple of str, default=None\n Attribute name(s) given as string or a list/tuple of strings\n Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n If `None`, `estimator` is considered fitted if there exist an\n attribute that ends with a underscore and does not start with double\n underscore.\n\nmsg : str, default=None\n The default error message is, \"This %(name)s instance is not fitted\n yet. Call 'fit' with appropriate arguments before using this\n estimator.\"\n\n For custom messages if \"%(name)s\" is present in the message string,\n it is substituted for the estimator name.\n\n Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\nall_or_any : callable, {all, any}, default=all\n Specify whether all or any of the given attributes must exist.\n\nReturns\n-------\nNone\n\nRaises\n------\nNotFittedError\n If the attributes are not found." - }, - { - "name": "check_non_negative", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "whom", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Who passed X to this function." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check if there is any negative value in an array.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Input data.\n\nwhom : str\n Who passed X to this function." - }, - { - "name": "check_scalar", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The scalar parameter to validate." - }, - { - "name": "name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the parameter to be printed in error messages." - }, - { - "name": "target_type", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Acceptable data types for the parameter." - }, - { - "name": "min_val", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The minimum valid value the parameter can take. If None (default) it is implied that the parameter does not have a lower bound." - }, - { - "name": "max_val", - "type": "Union[int, float]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum valid value the parameter can take. If None (default) it is implied that the parameter does not have an upper bound." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate scalar parameters type and value.\n\nParameters\n----------\nx : object\n The scalar parameter to validate.\n\nname : str\n The name of the parameter to be printed in error messages.\n\ntarget_type : type or tuple\n Acceptable data types for the parameter.\n\nmin_val : float or int, default=None\n The minimum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have a lower bound.\n\nmax_val : float or int, default=None\n The maximum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have an upper bound.\n\nRaises\n-------\nTypeError\n If the parameter's type does not match the desired type.\n\nValueError\n If the parameter's value violates the given bounds." - }, - { - "name": "_check_psd_eigenvalues", - "decorators": [], - "parameters": [ - { - "name": "lambdas", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Array of eigenvalues to check / fix." - }, - { - "name": "enable_warnings", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "When this is set to ``True``, a ``PositiveSpectrumWarning`` will be raised when there are imaginary parts, negative eigenvalues, or extremely small non-zero eigenvalues. Otherwise no warning will be raised. In both cases, imaginary parts, negative eigenvalues, and extremely small non-zero eigenvalues will be set to zero." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n 1e-5 times the maximum real part). If this check fails, it raises a\n ``ValueError``. Otherwise all non-significant imaginary parts that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n ``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\n more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n positive eigenvalue in double (simple) precision. If this check fails,\n it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.\n\nParameters\n----------\nlambdas : array-like of shape (n_eigenvalues,)\n Array of eigenvalues to check / fix.\n\nenable_warnings : bool, default=False\n When this is set to ``True``, a ``PositiveSpectrumWarning`` will be\n raised when there are imaginary parts, negative eigenvalues, or\n extremely small non-zero eigenvalues. Otherwise no warning will be\n raised. In both cases, imaginary parts, negative eigenvalues, and\n extremely small non-zero eigenvalues will be set to zero.\n\nReturns\n-------\nlambdas_fixed : ndarray of shape (n_eigenvalues,)\n A fixed validated copy of the array of eigenvalues.\n\nExamples\n--------\n>>> _check_psd_eigenvalues([1, 2]) # nominal case\narray([1, 2])\n>>> _check_psd_eigenvalues([5, 5j]) # significant imag part\nTraceback (most recent call last):\n ...\nValueError: There are significant imaginary parts in eigenvalues (1\n of the maximum real part). Either the matrix is not PSD, or there was\n an issue while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, 5e-5j]) # insignificant imag part\narray([5., 0.])\n>>> _check_psd_eigenvalues([-5, -1]) # all negative\nTraceback (most recent call last):\n ...\nValueError: All eigenvalues are negative (maximum is -1). Either the\n matrix is not PSD, or there was an issue while computing the\n eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -1]) # significant negative\nTraceback (most recent call last):\n ...\nValueError: There are significant negative eigenvalues (0.2 of the\n maximum positive). Either the matrix is not PSD, or there was an issue\n while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -5e-5]) # insignificant negative\narray([5., 0.])\n>>> _check_psd_eigenvalues([5, 4e-12]) # bad conditioning (too small)\narray([5., 0.])" - }, - { - "name": "_check_sample_weight", - "decorators": [], - "parameters": [ - { - "name": "sample_weight", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input sample weights." - }, - { - "name": "X", - "type": "Union[NDArray, List]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data." - }, - { - "name": "dtype: dtype", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "dtype of the validated `sample_weight`. If None, and the input `sample_weight` is an array, the dtype of the input is preserved; otherwise an array with the default numpy dtype is be allocated. If `dtype` is not one of `float32`, `float64`, `None`, the output will be of dtype `float64`." - }, - { - "name": "default=None", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "dtype of the validated `sample_weight`. If None, and the input `sample_weight` is an array, the dtype of the input is preserved; otherwise an array with the default numpy dtype is be allocated. If `dtype` is not one of `float32`, `float64`, `None`, the output will be of dtype `float64`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\n sample_weight = _check_sample_weight(...)\n\nParameters\n----------\nsample_weight : {ndarray, Number or None}, shape (n_samples,)\n Input sample weights.\n\nX : {ndarray, list, sparse matrix}\n Input data.\n\ndtype: dtype, default=None\n dtype of the validated `sample_weight`.\n If None, and the input `sample_weight` is an array, the dtype of the\n input is preserved; otherwise an array with the default numpy dtype\n is be allocated. If `dtype` is not one of `float32`, `float64`,\n `None`, the output will be of dtype `float64`.\n\nReturns\n-------\nsample_weight : ndarray of shape (n_samples,)\n Validated sample weight. It is guaranteed to be \"C\" contiguous." - }, - { - "name": "_allclose_dense_sparse", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First array to compare." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Second array to compare." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "1e-7", - "limitation": null, - "ignored": false, - "docstring": "Relative tolerance; see numpy.allclose." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "1e-9", - "limitation": null, - "ignored": false, - "docstring": "absolute tolerance; see numpy.allclose. Note that the default here is more tolerant than the default for numpy.testing.assert_allclose, where atol=0." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.\n\nParameters\n----------\nx : {array-like, sparse matrix}\n First array to compare.\n\ny : {array-like, sparse matrix}\n Second array to compare.\n\nrtol : float, default=1e-7\n Relative tolerance; see numpy.allclose.\n\natol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0." - }, - { - "name": "_check_fit_params", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data array." - }, - { - "name": "fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Dictionary containing the parameters passed at fit." - }, - { - "name": "indices", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indices to be selected if the parameter has the same size as `X`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check and validate the parameters passed during `fit`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data array.\n\nfit_params : dict\n Dictionary containing the parameters passed at fit.\n\nindices : array-like of shape (n_samples,), default=None\n Indices to be selected if the parameter has the same size as `X`.\n\nReturns\n-------\nfit_params_validated : dict\n Validated parameters. We ensure that the values support indexing." - } - ] - }, - { - "name": "sklearn.utils._arpack", - "imports": [ - "from validation import check_random_state" - ], - "classes": [], - "functions": [ - { - "name": "_init_arpack_v0", - "decorators": [], - "parameters": [ - { - "name": "size", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The size of the eigenvalue vector to be initialized." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The seed of the pseudo random number generator used to generate a uniform distribution. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Initialize the starting vector for iteration in ARPACK functions.\n\nInitialize a ndarray with values sampled from the uniform distribution on\n[-1, 1]. This initialization model has been chosen to be consistent with\nthe ARPACK one as another initialization can lead to convergence issues.\n\nParameters\n----------\nsize : int\n The size of the eigenvalue vector to be initialized.\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator used to generate a\n uniform distribution. If int, random_state is the seed used by the\n random number generator; If RandomState instance, random_state is the\n random number generator; If None, the random number generator is the\n RandomState instance used by `np.random`.\n\nReturns\n-------\nv0 : ndarray of shape (size,)\n The initialized vector." - } - ] - }, - { - "name": "sklearn.utils._encode", - "imports": [ - "from typing import NamedTuple", - "import numpy as np", - "from None import is_scalar_nan" - ], - "classes": [ - { - "name": "MissingValues", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "to_list", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert tuple to a list where None is always first." - } - ], - "docstring": "Data class for missing data information" - }, - { - "name": "_nandict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__missing__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dictionary with support for nans." - } - ], - "functions": [ - { - "name": "_unique", - "decorators": [], - "parameters": [ - { - "name": "values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to check for unknowns." - }, - { - "name": "return_inverse", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, also return the indices of the unique values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to find unique values with support for python objects.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\n\nParameters\n----------\nvalues : ndarray\n Values to check for unknowns.\n\nreturn_inverse : bool, default=False\n If True, also return the indices of the unique values.\n\nReturns\n-------\nunique : ndarray\n The sorted unique values.\n\nunique_inverse : ndarray\n The indices to reconstruct the original array from the unique array.\n Only provided if `return_inverse` is True." - }, - { - "name": "_extract_missing", - "decorators": [], - "parameters": [ - { - "name": "values: set", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Set of values to extract missing from." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Extract missing values from `values`.\n\nParameters\n----------\nvalues: set\n Set of values to extract missing from.\n\nReturns\n-------\noutput: set\n Set with missing values extracted.\n\nmissing_values: MissingValues\n Object with missing value information." - }, - { - "name": "_map_to_integer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Map values based on its position in uniques." - }, - { - "name": "_unique_python", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_encode", - "decorators": [], - "parameters": [ - { - "name": "values", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to encode." - }, - { - "name": "uniques", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The unique values in `values`. If the dtype is not object, then `uniques` needs to be sorted." - }, - { - "name": "check_unknown", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If True, check for values in `values` that are not in `unique` and raise an error. This is ignored for object dtype, and treated as True in this case. This parameter is useful for _BaseEncoder._transform() to avoid calling _check_unknown() twice." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to encode values into [0, n_uniques - 1].\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\nThe numpy method has the limitation that the `uniques` need to\nbe sorted. Importantly, this is not checked but assumed to already be\nthe case. The calling method needs to ensure this for all non-object\nvalues.\n\nParameters\n----------\nvalues : ndarray\n Values to encode.\nuniques : ndarray\n The unique values in `values`. If the dtype is not object, then\n `uniques` needs to be sorted.\ncheck_unknown : bool, default=True\n If True, check for values in `values` that are not in `unique`\n and raise an error. This is ignored for object dtype, and treated as\n True in this case. This parameter is useful for\n _BaseEncoder._transform() to avoid calling _check_unknown()\n twice.\n\nReturns\n-------\nencoded : ndarray\n Encoded values" - }, - { - "name": "_check_unknown", - "decorators": [], - "parameters": [ - { - "name": "values", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Values to check for unknowns." - }, - { - "name": "known_values", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Known values. Must be unique." - }, - { - "name": "return_mask", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, return a mask of the same shape as `values` indicating the valid values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to check for unknowns in values to be encoded.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\n\nParameters\n----------\nvalues : array\n Values to check for unknowns.\nknown_values : array\n Known values. Must be unique.\nreturn_mask : bool, default=False\n If True, return a mask of the same shape as `values` indicating\n the valid values.\n\nReturns\n-------\ndiff : list\n The unique values present in `values` and not in `know_values`.\nvalid_mask : boolean array\n Additionally returned if ``return_mask=True``." - } - ] - }, - { - "name": "sklearn.utils._estimator_html_repr", - "imports": [ - "from contextlib import closing", - "from contextlib import suppress", - "from io import StringIO", - "import uuid", - "import html", - "from sklearn import config_context" - ], - "classes": [ - { - "name": "_VisualBlock", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "kind", - "type": "Literal['serial', 'parallel', 'single']", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "kind of HTML block" - }, - { - "name": "estimators", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If kind != 'single', then `estimators` is a list of estimators. If kind == 'single', then `estimators` is a single estimator." - }, - { - "name": "names", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If kind != 'single', then `names` corresponds to estimators. If kind == 'single', then `names` is a single string corresponding to the single estimator." - }, - { - "name": "name_details", - "type": "Union[List[str], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If kind != 'single', then `name_details` corresponds to `names`. If kind == 'single', then `name_details` is a single string corresponding to the single estimator." - }, - { - "name": "dash_wrapped", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "If true, wrapped HTML element will be wrapped with a dashed border. Only active when kind != 'single'." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_sk_visual_block_", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "HTML Representation of Estimator\n\nParameters\n----------\nkind : {'serial', 'parallel', 'single'}\n kind of HTML block\n\nestimators : list of estimators or `_VisualBlock`s or a single estimator\n If kind != 'single', then `estimators` is a list of\n estimators.\n If kind == 'single', then `estimators` is a single estimator.\n\nnames : list of str, default=None\n If kind != 'single', then `names` corresponds to estimators.\n If kind == 'single', then `names` is a single string corresponding to\n the single estimator.\n\nname_details : list of str, str, or None, default=None\n If kind != 'single', then `name_details` corresponds to `names`.\n If kind == 'single', then `name_details` is a single string\n corresponding to the single estimator.\n\ndash_wrapped : bool, default=True\n If true, wrapped HTML element will be wrapped with a dashed border.\n Only active when kind != 'single'." - } - ], - "functions": [ - { - "name": "_write_label_html", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Write labeled html with or without a dropdown with named details" - }, - { - "name": "_get_visual_block", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate information about how to display an estimator.\n " - }, - { - "name": "_write_estimator_html", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Write estimator to html in serial, parallel, or by itself (single).\n " - }, - { - "name": "estimator_html_repr", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to visualize." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Build a HTML representation of an estimator.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n The estimator to visualize.\n\nReturns\n-------\nhtml: str\n HTML representation of estimator." - } - ] - }, - { - "name": "sklearn.utils._joblib", - "imports": [ - "import warnings as _warnings", - "import joblib", - "from joblib import logger", - "from joblib import dump", - "from joblib import load", - "from joblib import __version__", - "from joblib import effective_n_jobs", - "from joblib import hash", - "from joblib import cpu_count", - "from joblib import Parallel", - "from joblib import Memory", - "from joblib import delayed", - "from joblib import parallel_backend", - "from joblib import register_parallel_backend" - ], - "classes": [], - "functions": [] - }, - { - "name": "sklearn.utils._mask", - "imports": [ - "import numpy as np", - "from scipy import sparse as sp", - "from None import is_scalar_nan", - "from fixes import _object_dtype_isnan" - ], - "classes": [], - "functions": [ - { - "name": "_get_dense_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_get_mask", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features." - }, - { - "name": "value_to_mask", - "type": "Union[float, int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The value which is to be masked in X." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the boolean mask X == value_to_mask.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nvalue_to_mask : {int, float}\n The value which is to be masked in X.\n\nReturns\n-------\nX_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Missing mask." - } - ] - }, - { - "name": "sklearn.utils._mocking", - "imports": [ - "import numpy as np", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from validation import _num_samples", - "from validation import check_array", - "from validation import check_is_fitted" - ], - "classes": [ - { - "name": "ArraySlicingWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getitem__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Parameters\n----------\narray" - }, - { - "name": "MockDataFrame", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "array", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__len__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__array__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__eq__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__ne__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Parameters\n----------\narray" - }, - { - "name": "CheckingClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "check_y", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable used to validate `X` and `y`. These callable should return a bool where `False` will trigger an `AssertionError`." - }, - { - "name": "check_X", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The callable used to validate `X` and `y`. These callable should return a bool where `False` will trigger an `AssertionError`." - }, - { - "name": "check_y_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The optional parameters to pass to `check_X` and `check_y`." - }, - { - "name": "check_X_params", - "type": "Dict", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The optional parameters to pass to `check_X` and `check_y`." - }, - { - "name": "methods_to_check", - "type": "Union[List[str], Literal[\"all\"]]", - "hasDefault": true, - "default": "\"all\"", - "limitation": null, - "ignored": false, - "docstring": "The methods in which the checks should be applied. By default, all checks will be done on all methods (`fit`, `predict`, `predict_proba`, `decision_function` and `score`)." - }, - { - "name": "foo_param", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1 otherwise it is 0." - }, - { - "name": "expected_fit_params", - "type": "List[str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A list of the expected parameters given when calling `fit`." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_check_X_y", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The data set." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The corresponding target, by default None." - }, - { - "name": "should_be_fitted", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not the classifier should be already fitted. By default True." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Validate X and y and make extra check.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data set.\ny : array-like of shape (n_samples), default=None\n The corresponding target, by default None.\nshould_be_fitted : bool, default=True\n Whether or not the classifier should be already fitted.\n By default True.\n\nReturns\n-------\nX, y" - }, - { - "name": "fit", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - }, - { - "name": "**fit_params", - "type": "Dict", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters passed to the ``fit`` method of the estimator" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Fit classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_outputs) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n\nReturns\n-------\nself" - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict the first class seen in `classes_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\npreds : ndarray of shape (n_samples,)\n Predictions of the first class seens in `classes_`." - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Predict probabilities for each class.\n\nHere, the dummy classifier will provide a probability of 1 for the\nfirst class of `classes_` and 0 otherwise.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes)\n The probabilities for each sample and class." - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The input data." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Confidence score.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence score." - }, - { - "name": "score", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Input data, where n_samples is the number of samples and n_features is the number of features." - }, - { - "name": "Y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target relative to X for classification or regression; None for unsupervised learning." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Fake score.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\nY : array-like of shape (n_samples, n_output) or (n_samples,)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nReturns\n-------\nscore : float\n Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>\n score=1` otherwise `score=0`)." - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy classifier to test pipelining and meta-estimators.\n\nChecks some property of `X` and `y`in fit / predict.\nThis allows testing whether pipelines / cross-validation or metaestimators\nchanged the input.\n\nCan also be used to check if `fit_params` are passed correctly, and\nto force a certain score to be returned.\n\nParameters\n----------\ncheck_y, check_X : callable, default=None\n The callable used to validate `X` and `y`. These callable should return\n a bool where `False` will trigger an `AssertionError`.\n\ncheck_y_params, check_X_params : dict, default=None\n The optional parameters to pass to `check_X` and `check_y`.\n\nmethods_to_check : \"all\" or list of str, default=\"all\"\n The methods in which the checks should be applied. By default,\n all checks will be done on all methods (`fit`, `predict`,\n `predict_proba`, `decision_function` and `score`).\n\nfoo_param : int, default=0\n A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1\n otherwise it is 0.\n\nexpected_fit_params : list of str, default=None\n A list of the expected parameters given when calling `fit`.\n\nAttributes\n----------\nclasses_ : int\n The classes seen during `fit`.\n\nn_features_in_ : int\n The number of features seen during `fit`.\n\nExamples\n--------\n>>> from sklearn.utils._mocking import CheckingClassifier\n\nThis helper allow to assert to specificities regarding `X` or `y`. In this\ncase we expect `check_X` or `check_y` to return a boolean.\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))\n>>> clf.fit(X, y)\nCheckingClassifier(...)\n\nWe can also provide a check which might raise an error. In this case, we\nexpect `check_X` to return `X` and `check_y` to return `y`.\n\n>>> from sklearn.utils import check_array\n>>> clf = CheckingClassifier(check_X=check_array)\n>>> clf.fit(X, y)\nCheckingClassifier(...)" - }, - { - "name": "NoSampleWeightWrapper", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "est", - "type": null, - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator to wrap." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Wrap estimator which will not expose `sample_weight`.\n\nParameters\n----------\nest : estimator, default=None\n The estimator to wrap." - } - ], - "functions": [] - }, - { - "name": "sklearn.utils._pprint", - "imports": [ - "import inspect", - "import pprint", - "from collections import OrderedDict", - "from base import BaseEstimator", - "from _config import get_config", - "from None import is_scalar_nan" - ], - "classes": [ - { - "name": "KeyValTuple", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Dummy class for correctly rendering key-value tuples from dicts." - }, - { - "name": "KeyValTupleParam", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Dummy class for correctly rendering key-value tuples from parameters." - }, - { - "name": "_EstimatorPrettyPrinter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_pprint_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_format_dict_items", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_format_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_format_params_or_dict_items", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Format dict items or parameters respecting the compact=True\nparameter. For some reason, the builtin rendering of dict items doesn't\nrespect compact=True and will use one line per key-value if all cannot\nfit in a single line.\nDict items will be rendered as <'key': value> while params will be\nrendered as . The implementation is mostly copy/pasting from\nthe builtin _format_items().\nThis also adds ellipsis if the number of items is greater than\nself.n_max_elements_to_show." - }, - { - "name": "_format_items", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Format the items of an iterable (list, tuple...). Same as the\nbuilt-in _format_items, with support for ellipsis if the number of\nelements is greater than self.n_max_elements_to_show." - }, - { - "name": "_pprint_key_val_tuple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Pretty printing for key-value tuples from dict or parameters." - } - ], - "docstring": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n here)\n- format() directly calls _safe_repr() for a first try at rendering the\n object\n- _safe_repr formats the whole object reccursively, only calling itself,\n not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n the type of the object. This where the line length and the compact\n parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this." - } - ], - "functions": [ - { - "name": "_changed_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Return dict (param_name: value) of parameters that were given to\nestimator with non-default values." - }, - { - "name": "_safe_repr", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Same as the builtin _safe_repr, with added support for Estimator\nobjects." - } - ] - }, - { - "name": "sklearn.utils._show_versions", - "imports": [ - "import platform", - "import sys", - "import importlib", - "from _openmp_helpers import _openmp_parallelism_enabled" - ], - "classes": [], - "functions": [ - { - "name": "_get_sys_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "System information\n\nReturns\n-------\nsys_info : dict\n system and Python version information" - }, - { - "name": "_get_deps_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": null, - "docstring": "Overview of the installed version of main dependencies\n\nReturns\n-------\ndeps_info: dict\n version information on relevant Python libraries" - }, - { - "name": "show_versions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Print useful debugging information\"\n\n.. versionadded:: 0.20" - } - ] - }, - { - "name": "sklearn.utils._tags", - "imports": [ - "import numpy as np" - ], - "classes": [], - "functions": [ - { - "name": "_safe_tags", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator from which to get the tag." - }, - { - "name": "key", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Tag name to get. By default (`None`), all tags are returned." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Safely get estimator tags.\n\n:class:`~sklearn.BaseEstimator` provides the estimator tags machinery.\nHowever, if an estimator does not inherit from this base class, we should\nfall-back to the default tags.\n\nFor scikit-learn built-in estimators, we should still rely on\n`self._get_tags()`. `_safe_tags(est)` should be used when we are not sure\nwhere `est` comes from: typically `_safe_tags(self.base_estimator)` where\n`self` is a meta-estimator, or in the common checks.\n\nParameters\n----------\nestimator : estimator object\n The estimator from which to get the tag.\n\nkey : str, default=None\n Tag name to get. By default (`None`), all tags are returned.\n\nReturns\n-------\ntags : dict or tag value\n The estimator tags. A single value is returned if `key` is not None." - } - ] - }, - { - "name": "sklearn.utils._testing", - "imports": [ - "import os", - "import os.path as op", - "import inspect", - "import warnings", - "import sys", - "import functools", - "import tempfile", - "from subprocess import check_output", - "from subprocess import STDOUT", - "from subprocess import CalledProcessError", - "from subprocess import TimeoutExpired", - "import re", - "import contextlib", - "from collections.abc import Iterable", - "import scipy as sp", - "from functools import wraps", - "from inspect import signature", - "import shutil", - "import atexit", - "import unittest", - "from unittest import TestCase", - "from numpy.testing import assert_allclose", - "from numpy.testing import assert_almost_equal", - "from numpy.testing import assert_approx_equal", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_less", - "import numpy as np", - "import joblib", - "import sklearn", - "from sklearn.utils import IS_PYPY", - "from sklearn.utils import _IS_32BIT", - "from sklearn.utils.multiclass import check_classification_targets", - "from sklearn.utils.validation import check_array", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.utils.validation import check_X_y", - "import pytest", - "from numpydoc import docscrape", - "import difflib", - "import pprint", - "from sklearn.metrics import accuracy_score", - "from sklearn.metrics import r2_score" - ], - "classes": [ - { - "name": "_IgnoreWarnings", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "category", - "type": "Tuple[]", - "hasDefault": true, - "default": "Warning", - "limitation": null, - "ignored": false, - "docstring": "The category to filter. By default, all the categories will be muted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Decorator to catch and hide warnings without visual nesting." - }, - { - "name": "__repr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__enter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Improved and simplified Python warnings context manager and decorator.\n\nThis class allows the user to ignore the warnings raised by a function.\nCopied from Python 2.7.5 and modified as required.\n\nParameters\n----------\ncategory : tuple of warning class, default=Warning\n The category to filter. By default, all the categories will be muted." - }, - { - "name": "TempMemmap", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "mmap_mode", - "type": "str", - "hasDefault": true, - "default": "'r'", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__enter__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Parameters\n----------\ndata\nmmap_mode : str, default='r'" - }, - { - "name": "_Raises", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__exit__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MinimalClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Minimal classifier implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`." - }, - { - "name": "MinimalRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Minimal regressor implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`." - }, - { - "name": "MinimalTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "get_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Minimal transformer implementation with inheriting from\nBaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`." - } - ], - "functions": [ - { - "name": "assert_warns", - "decorators": [], - "parameters": [ - { - "name": "warning_class", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to test for, e.g. UserWarning." - }, - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable object to trigger warnings." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Test that a certain warning occurs.\n\nParameters\n----------\nwarning_class : the warning class\n The class to test for, e.g. UserWarning.\n\nfunc : callable\n Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`\n\nReturns\n-------\nresult : the return value of `func`" - }, - { - "name": "assert_warns_message", - "decorators": [], - "parameters": [ - { - "name": "warning_class", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The class to test for, e.g. UserWarning." - }, - { - "name": "message", - "type": "Union[Callable, str]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The message or a substring of the message to test for. If callable, it takes a string as the argument and will trigger an AssertionError if the callable returns `False`." - }, - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable object to trigger warnings." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Test that a certain warning occurs and with a certain message.\n\nParameters\n----------\nwarning_class : the warning class\n The class to test for, e.g. UserWarning.\n\nmessage : str or callable\n The message or a substring of the message to test for. If callable,\n it takes a string as the argument and will trigger an AssertionError\n if the callable returns `False`.\n\nfunc : callable\n Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`.\n\nReturns\n-------\nresult : the return value of `func`" - }, - { - "name": "assert_warns_div0", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assume that numpy's warning for divide by zero is raised.\n\nHandles the case of platforms that do not support warning on divide by\nzero.\n\nParameters\n----------\nfunc\n*args\n**kw" - }, - { - "name": "assert_no_warnings", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kw", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Parameters\n----------\nfunc\n*args\n**kw" - }, - { - "name": "ignore_warnings", - "decorators": [], - "parameters": [ - { - "name": "obj", - "type": "Callable", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "callable where you want to ignore the warnings." - }, - { - "name": "category", - "type": null, - "hasDefault": true, - "default": "Warning", - "limitation": null, - "ignored": false, - "docstring": "The category to filter. If Warning, all categories will be muted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Context manager and decorator to ignore warnings.\n\nNote: Using this (in both variants) will clear all warnings\nfrom all python modules loaded. In case you need to test\ncross-module-warning-logging, this is not your tool of choice.\n\nParameters\n----------\nobj : callable, default=None\n callable where you want to ignore the warnings.\ncategory : warning class, default=Warning\n The category to filter. If Warning, all categories will be muted.\n\nExamples\n--------\n>>> with ignore_warnings():\n... warnings.warn('buhuhuhu')\n\n>>> def nasty_warn():\n... warnings.warn('buhuhuhu')\n... print(42)\n\n>>> ignore_warnings(nasty_warn)()\n42" - }, - { - "name": "assert_raise_message", - "decorators": [], - "parameters": [ - { - "name": "exceptions", - "type": "Tuple[]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "An Exception object." - }, - { - "name": "message", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The error message or a substring of the error message." - }, - { - "name": "function", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Callable object to raise error." - }, - { - "name": "*args", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "**kwargs", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper function to test the message raised in an exception.\n\nGiven an exception, a callable to raise the exception, and\na message string, tests that the correct exception is raised and\nthat the message is a substring of the error thrown. Used to test\nthat the specific message thrown during an exception is correct.\n\nParameters\n----------\nexceptions : exception or tuple of exception\n An Exception object.\n\nmessage : str\n The error message or a substring of the error message.\n\nfunction : callable\n Callable object to raise error.\n\n*args : the positional arguments to `function`.\n\n**kwargs : the keyword arguments to `function`." - }, - { - "name": "assert_allclose_dense_sparse", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "First array to compare." - }, - { - "name": "y", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Second array to compare." - }, - { - "name": "rtol", - "type": "float", - "hasDefault": true, - "default": "1e-07", - "limitation": null, - "ignored": false, - "docstring": "relative tolerance; see numpy.allclose." - }, - { - "name": "atol", - "type": "float", - "hasDefault": true, - "default": "1e-9", - "limitation": null, - "ignored": false, - "docstring": "absolute tolerance; see numpy.allclose. Note that the default here is more tolerant than the default for numpy.testing.assert_allclose, where atol=0." - }, - { - "name": "err_msg", - "type": "str", - "hasDefault": true, - "default": "''", - "limitation": null, - "ignored": false, - "docstring": "Error message to raise." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Assert allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.\n\nParameters\n----------\nx : {array-like, sparse matrix}\n First array to compare.\n\ny : {array-like, sparse matrix}\n Second array to compare.\n\nrtol : float, default=1e-07\n relative tolerance; see numpy.allclose.\n\natol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0.\n\nerr_msg : str, default=''\n Error message to raise." - }, - { - "name": "set_random_state", - "decorators": [], - "parameters": [ - { - "name": "estimator", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The estimator." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Pseudo random number generator state. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Set random state of an estimator if it has the `random_state` param.\n\nParameters\n----------\nestimator : object\n The estimator.\nrandom_state : int, RandomState instance or None, default=0\n Pseudo random number generator state.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `." - }, - { - "name": "check_skip_network", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_delete_folder", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Utility function to cleanup a temporary folder if still existing.\n\nCopy from joblib.pool (for independence)." - }, - { - "name": "create_memmap_backed_data", - "decorators": [], - "parameters": [ - { - "name": "data", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "mmap_mode", - "type": "str", - "hasDefault": true, - "default": "'r'", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "return_folder", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Parameters\n----------\ndata\nmmap_mode : str, default='r'\nreturn_folder : bool, default=False" - }, - { - "name": "_get_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper to get function arguments." - }, - { - "name": "_get_func_name", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function object." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get function full name.\n\nParameters\n----------\nfunc : callable\n The function object.\n\nReturns\n-------\nname : str\n The function name." - }, - { - "name": "check_docstring_parameters", - "decorators": [], - "parameters": [ - { - "name": "func", - "type": "Callable", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The function object to test." - }, - { - "name": "doc", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Docstring if it is passed manually to the test." - }, - { - "name": "ignore", - "type": "List", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameters to ignore." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Helper to check docstring.\n\nParameters\n----------\nfunc : callable\n The function object to test.\ndoc : str, default=None\n Docstring if it is passed manually to the test.\nignore : list, default=None\n Parameters to ignore.\n\nReturns\n-------\nincorrect : list\n A list of string describing the incorrect results." - }, - { - "name": "assert_run_python_script", - "decorators": [], - "parameters": [ - { - "name": "source_code", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The Python source code to execute." - }, - { - "name": "timeout", - "type": "int", - "hasDefault": true, - "default": "60", - "limitation": null, - "ignored": false, - "docstring": "Time in seconds before timeout." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Utility to check assertions in an independent Python subprocess.\n\nThe script provided in the source code should return 0 and not print\nanything on stderr or stdout.\n\nThis is a port from cloudpickle https://github.com/cloudpipe/cloudpickle\n\nParameters\n----------\nsource_code : str\n The Python source code to execute.\ntimeout : int, default=60\n Time in seconds before timeout." - }, - { - "name": "_convert_container", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "raises", - "decorators": [], - "parameters": [ - { - "name": "excepted_exc_type", - "type": "List", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The exception that should be raised by the block. If a list, the block should raise one of the exceptions." - }, - { - "name": "match", - "type": "Union[List[str], str]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "A regex that the exception message should match. If a list, one of the entries must match. If None, match isn't enforced." - }, - { - "name": "may_pass", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True, the block is allowed to not raise an exception. Useful in cases where some estimators may support a feature but others must fail with an appropriate error message. By default, the context manager will raise an exception if the block does not raise an exception." - }, - { - "name": "err_msg", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If the context manager fails (e.g. the block fails to raise the proper exception, or fails to match), then an AssertionError is raised with this message. By default, an AssertionError is raised with a default error message (depends on the kind of failure). Use this to indicate how users should fix their estimators to pass the checks." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Context manager to ensure exceptions are raised within a code block.\n\nThis is similar to and inspired from pytest.raises, but supports a few\nother cases.\n\nThis is only intended to be used in estimator_checks.py where we don't\nwant to use pytest. In the rest of the code base, just use pytest.raises\ninstead.\n\nParameters\n----------\nexcepted_exc_type : Exception or list of Exception\n The exception that should be raised by the block. If a list, the block\n should raise one of the exceptions.\nmatch : str or list of str, default=None\n A regex that the exception message should match. If a list, one of\n the entries must match. If None, match isn't enforced.\nmay_pass : bool, default=False\n If True, the block is allowed to not raise an exception. Useful in\n cases where some estimators may support a feature but others must\n fail with an appropriate error message. By default, the context\n manager will raise an exception if the block does not raise an\n exception.\nerr_msg : str, default=None\n If the context manager fails (e.g. the block fails to raise the\n proper exception, or fails to match), then an AssertionError is\n raised with this message. By default, an AssertionError is raised\n with a default error message (depends on the kind of failure). Use\n this to indicate how users should fix their estimators to pass the\n checks.\n\nAttributes\n----------\nraised_and_matched : bool\n True if an exception was raised and a match was found, False otherwise." - } - ] - }, - { - "name": "sklearn.utils", - "imports": [ - "import pkgutil", - "import inspect", - "from importlib import import_module", - "from operator import itemgetter", - "from collections.abc import Sequence", - "from contextlib import contextmanager", - "from itertools import compress", - "from itertools import islice", - "import numbers", - "import platform", - "import struct", - "import timeit", - "from pathlib import Path", - "import warnings", - "import numpy as np", - "from scipy.sparse import issparse", - "from murmurhash import murmurhash3_32", - "from class_weight import compute_class_weight", - "from class_weight import compute_sample_weight", - "from None import _joblib", - "from exceptions import DataConversionWarning", - "from deprecation import deprecated", - "from fixes import np_version", - "from fixes import parse_version", - "from _estimator_html_repr import estimator_html_repr", - "from validation import as_float_array", - "from validation import assert_all_finite", - "from validation import check_random_state", - "from validation import column_or_1d", - "from validation import check_array", - "from validation import check_consistent_length", - "from validation import check_X_y", - "from validation import indexable", - "from validation import check_symmetric", - "from validation import check_scalar", - "from validation import _deprecate_positional_args", - "from None import get_config", - "import matplotlib", - "import pandas", - "from _testing import ignore_warnings", - "from base import BaseEstimator", - "from base import ClassifierMixin", - "from base import RegressorMixin", - "from base import TransformerMixin", - "from base import ClusterMixin" - ], - "classes": [ - { - "name": "Bunch", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setattr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__dir__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__getattr__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__setstate__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "Container object exposing keys as attributes.\n\nBunch objects are sometimes used as an output for functions and methods.\nThey extend dictionaries by enabling values to be accessed by key,\n`bunch[\"value_key\"]`, or by an attribute, `bunch.value_key`.\n\nExamples\n--------\n>>> b = Bunch(a=1, b=2)\n>>> b['b']\n2\n>>> b.b\n2\n>>> b.a = 3\n>>> b['a']\n3\n>>> b.c = 6\n>>> b['c']\n6" - } - ], - "functions": [ - { - "name": "safe_mask", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data on which to apply mask." - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mask to be used on X." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return a mask which is safe to use on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Data on which to apply mask.\n\nmask : ndarray\n Mask to be used on X.\n\nReturns\n-------\n mask" - }, - { - "name": "axis0_safe_slice", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data on which to apply mask." - }, - { - "name": "mask", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Mask to be used on X." - }, - { - "name": "len_mask", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The length of the mask." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "This mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Data on which to apply mask.\n\nmask : ndarray\n Mask to be used on X.\n\nlen_mask : int\n The length of the mask.\n\nReturns\n-------\n mask" - }, - { - "name": "_array_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Index an array or scipy.sparse consistently across NumPy version." - }, - { - "name": "_pandas_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Index a pandas dataframe or a series." - }, - { - "name": "_list_indexing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Index a Python list." - }, - { - "name": "_determine_key_type", - "decorators": [], - "parameters": [ - { - "name": "key", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The key from which we want to infer the data type." - }, - { - "name": "accept_slice", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether or not to raise an error if the key is a slice." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Determine the data type of key.\n\nParameters\n----------\nkey : scalar, slice or array-like\n The key from which we want to infer the data type.\n\naccept_slice : bool, default=True\n Whether or not to raise an error if the key is a slice.\n\nReturns\n-------\ndtype : {'int', 'str', 'bool', None}\n Returns the data type of key." - }, - { - "name": "_safe_indexing", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[List, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Data from which to sample rows, items or columns. `list` are only supported when `axis=0`." - }, - { - "name": "indices", - "type": "Union[int, bool, str, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "- If `axis=0`, boolean and integer array-like, integer slice, and scalar integer are supported. - If `axis=1`: - to select a single column, `indices` can be of `int` type for all `X` types and `str` only for dataframe. The selected subset will be 1D, unless `X` is a sparse matrix in which case it will be 2D. - to select multiples columns, `indices` can be one of the following: `list`, `array`, `slice`. The type used in these containers can be one of the following: `int`, 'bool' and `str`. However, `str` is only supported when `X` is a dataframe. The selected subset will be 2D." - }, - { - "name": "axis", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The axis along which `X` will be subsampled. `axis=0` will select rows while `axis=1` will select columns." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Return rows, items or columns of X using indices.\n\n.. warning::\n\n This utility is documented, but **private**. This means that\n backward compatibility might be broken without any deprecation\n cycle.\n\nParameters\n----------\nX : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series\n Data from which to sample rows, items or columns. `list` are only\n supported when `axis=0`.\nindices : bool, int, str, slice, array-like\n - If `axis=0`, boolean and integer array-like, integer slice,\n and scalar integer are supported.\n - If `axis=1`:\n - to select a single column, `indices` can be of `int` type for\n all `X` types and `str` only for dataframe. The selected subset\n will be 1D, unless `X` is a sparse matrix in which case it will\n be 2D.\n - to select multiples columns, `indices` can be one of the\n following: `list`, `array`, `slice`. The type used in\n these containers can be one of the following: `int`, 'bool' and\n `str`. However, `str` is only supported when `X` is a dataframe.\n The selected subset will be 2D.\naxis : int, default=0\n The axis along which `X` will be subsampled. `axis=0` will select\n rows while `axis=1` will select columns.\n\nReturns\n-------\nsubset\n Subset of X on axis 0 or 1.\n\nNotes\n-----\nCSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are\nnot supported." - }, - { - "name": "_get_column_indices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get feature column indices for input data X and key.\n\nFor accepted values of `key`, see the docstring of\n:func:`_safe_indexing_column`." - }, - { - "name": "resample", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indexable data-structures can be arrays, lists, dataframes or scipy sparse matrices with consistent first dimension." - }, - { - "name": "replace", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Implements resampling with replacement. If False, this will implement (sliced) random permutations." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate. If left to None this is automatically set to the first dimension of the arrays. If replace is False it should not be larger than the length of arrays." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "stratify", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "If not None, data is split in a stratified fashion, using this as the class labels." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Resample arrays or sparse matrices in a consistent way.\n\nThe default strategy implements one step of the bootstrapping\nprocedure.\n\nParameters\n----------\n*arrays : sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\nreplace : bool, default=True\n Implements resampling with replacement. If False, this will implement\n (sliced) random permutations.\n\nn_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays.\n If replace is False it should not be larger than the length of\n arrays.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nstratify : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n\nReturns\n-------\nresampled_arrays : sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)\n Sequence of resampled copies of the collections. The original arrays\n are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import resample\n >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)\n >>> X\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 4 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([0, 1, 0])\n\n >>> resample(y, n_samples=2, random_state=0)\n array([0, 1])\n\nExample using stratification::\n\n >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]\n >>> resample(y, n_samples=5, replace=False, stratify=y,\n ... random_state=0)\n [1, 1, 1, 0, 1]\n\nSee Also\n--------\nshuffle" - }, - { - "name": "shuffle", - "decorators": [], - "parameters": [ - { - "name": "*arrays", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Indexable data-structures can be arrays, lists, dataframes or scipy sparse matrices with consistent first dimension." - }, - { - "name": "random_state", - "type": "Optional[int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples to generate. If left to None this is automatically set to the first dimension of the arrays. It should not be larger than the length of arrays." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.\n\nParameters\n----------\n*arrays : sequence of indexable data-structures\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nn_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays. It should\n not be larger than the length of arrays.\n\nReturns\n-------\nshuffled_arrays : sequence of indexable data-structures\n Sequence of shuffled copies of the collections. The original arrays\n are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import shuffle\n >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n >>> X\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 3 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([2, 1, 0])\n\n >>> shuffle(y, n_samples=2, random_state=0)\n array([0, 1])\n\nSee Also\n--------\nresample" - }, - { - "name": "safe_sqr", - "decorators": [], - "parameters": [ - { - "name": "X", - "type": "Union[NDArray, ArrayLike]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "copy", - "type": "bool", - "hasDefault": true, - "default": "True", - "limitation": null, - "ignored": false, - "docstring": "Whether to create a copy of X and operate on it or to perform inplace computation (default behaviour)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Element wise squaring of array-likes and sparse matrices.\n\nParameters\n----------\nX : {array-like, ndarray, sparse matrix}\n\ncopy : bool, default=True\n Whether to create a copy of X and operate on it or to perform\n inplace computation (default behaviour).\n\nReturns\n-------\nX ** 2 : element wise square" - }, - { - "name": "_chunk_generator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Chunk generator, ``gen`` into lists of length ``chunksize``. The last\nchunk may have a length less than ``chunksize``." - }, - { - "name": "gen_batches", - "decorators": [], - "parameters": [ - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "batch_size", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of element in each batch." - }, - { - "name": "min_batch_size", - "type": "int", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "Minimum batch size to produce." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generator to create slices containing batch_size elements, from 0 to n.\n\nThe last slice may contain less than batch_size elements, when batch_size\ndoes not divide n.\n\nParameters\n----------\nn : int\nbatch_size : int\n Number of element in each batch.\nmin_batch_size : int, default=0\n Minimum batch size to produce.\n\nYields\n------\nslice of batch_size elements\n\nExamples\n--------\n>>> from sklearn.utils import gen_batches\n>>> list(gen_batches(7, 3))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(6, 3))\n[slice(0, 3, None), slice(3, 6, None)]\n>>> list(gen_batches(2, 3))\n[slice(0, 2, None)]\n>>> list(gen_batches(7, 3, min_batch_size=0))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(7, 3, min_batch_size=2))\n[slice(0, 3, None), slice(3, 7, None)]" - }, - { - "name": "gen_even_slices", - "decorators": [], - "parameters": [ - { - "name": "n", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "n_packs", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of slices to generate." - }, - { - "name": "n_samples", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of samples. Pass n_samples when the slices are to be used for sparse matrix indexing; slicing off-the-end raises an exception, while it works for NumPy arrays." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generator to create n_packs slices going up to n.\n\nParameters\n----------\nn : int\nn_packs : int\n Number of slices to generate.\nn_samples : int, default=None\n Number of samples. Pass n_samples when the slices are to be used for\n sparse matrix indexing; slicing off-the-end raises an exception, while\n it works for NumPy arrays.\n\nYields\n------\nslice\n\nExamples\n--------\n>>> from sklearn.utils import gen_even_slices\n>>> list(gen_even_slices(10, 1))\n[slice(0, 10, None)]\n>>> list(gen_even_slices(10, 10))\n[slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n>>> list(gen_even_slices(10, 5))\n[slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n>>> list(gen_even_slices(10, 3))\n[slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]" - }, - { - "name": "tosequence", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Cast iterable x to a Sequence, avoiding a copy if possible.\n\nParameters\n----------\nx : iterable" - }, - { - "name": "_to_object_array", - "decorators": [], - "parameters": [ - { - "name": "sequence", - "type": "ArrayLike", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The sequence to be converted." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Convert sequence to a 1-D NumPy array of object dtype.\n\nnumpy.array constructor has a similar use but it's output\nis ambiguous. It can be 1-D NumPy array of object dtype if\nthe input is a ragged array, but if the input is a list of\nequal length arrays, then the output is a 2D numpy.array.\n_to_object_array solves this ambiguity by guarantying that\nthe output is a 1-D NumPy array of objects for any input.\n\nParameters\n----------\nsequence : array-like of shape (n_elements,)\n The sequence to be converted.\n\nReturns\n-------\nout : ndarray of shape (n_elements,), dtype=object\n The converted sequence into a 1-D NumPy array of object dtype.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _to_object_array\n>>> _to_object_array([np.array([0]), np.array([1])])\narray([array([0]), array([1])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)" - }, - { - "name": "indices_to_mask", - "decorators": [], - "parameters": [ - { - "name": "indices", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "List of integers treated as indices." - }, - { - "name": "mask_length", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Length of boolean mask to be generated. This parameter must be greater than max(indices)." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Convert list of indices to boolean mask.\n\nParameters\n----------\nindices : list-like\n List of integers treated as indices.\nmask_length : int\n Length of boolean mask to be generated.\n This parameter must be greater than max(indices).\n\nReturns\n-------\nmask : 1d boolean nd-array\n Boolean array that is True where indices are present, else False.\n\nExamples\n--------\n>>> from sklearn.utils import indices_to_mask\n>>> indices = [1, 2 , 3, 4]\n>>> indices_to_mask(indices, 5)\narray([False, True, True, True, True])" - }, - { - "name": "_message_with_time", - "decorators": [], - "parameters": [ - { - "name": "source", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String indicating the source or the reference of the message." - }, - { - "name": "message", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Short message." - }, - { - "name": "time", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Time in seconds." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Create one line message for logging purposes.\n\nParameters\n----------\nsource : str\n String indicating the source or the reference of the message.\n\nmessage : str\n Short message.\n\ntime : int\n Time in seconds." - }, - { - "name": "_print_elapsed_time", - "decorators": [], - "parameters": [ - { - "name": "source", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "String indicating the source or the reference of the message." - }, - { - "name": "message", - "type": "str", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Short message. If None, nothing will be printed." - } - ], - "hasReturnType": false, - "returnType": null, - "docstring": "Log elapsed time to stdout when the context is exited.\n\nParameters\n----------\nsource : str\n String indicating the source or the reference of the message.\n\nmessage : str, default=None\n Short message. If None, nothing will be printed.\n\nReturns\n-------\ncontext_manager\n Prints elapsed time upon exit if verbose." - }, - { - "name": "get_chunk_n_rows", - "decorators": [], - "parameters": [ - { - "name": "row_bytes", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The expected number of bytes of memory that will be consumed during the processing of each row." - }, - { - "name": "max_n_rows", - "type": "int", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The maximum return value." - }, - { - "name": "working_memory", - "type": "Union[float, int]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The number of rows to fit inside this number of MiB will be returned. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Calculates how many rows can be processed within working_memory.\n\nParameters\n----------\nrow_bytes : int\n The expected number of bytes of memory that will be consumed\n during the processing of each row.\nmax_n_rows : int, default=None\n The maximum return value.\nworking_memory : int or float, default=None\n The number of rows to fit inside this number of MiB will be returned.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\nint or the value of n_samples\n\nWarns\n-----\nIssues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB." - }, - { - "name": "is_scalar_nan", - "decorators": [], - "parameters": [ - { - "name": "x", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Tests if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').\n\nParameters\n----------\nx : any type\n\nReturns\n-------\nboolean\n\nExamples\n--------\n>>> is_scalar_nan(np.nan)\nTrue\n>>> is_scalar_nan(float(\"nan\"))\nTrue\n>>> is_scalar_nan(None)\nFalse\n>>> is_scalar_nan(\"\")\nFalse\n>>> is_scalar_nan([np.nan])\nFalse" - }, - { - "name": "_approximate_mode", - "decorators": [], - "parameters": [ - { - "name": "class_counts", - "type": "NDArray[int]", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Population per class." - }, - { - "name": "n_draws", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Number of draws (samples to draw) from the overall population." - }, - { - "name": "rng", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Used to break ties." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Computes approximate mode of multivariate hypergeometric.\n\nThis is an approximation to the mode of the multivariate\nhypergeometric given by class_counts and n_draws.\nIt shouldn't be off by more than one.\n\nIt is the mostly likely outcome of drawing n_draws many\nsamples from the population given by class_counts.\n\nParameters\n----------\nclass_counts : ndarray of int\n Population per class.\nn_draws : int\n Number of draws (samples to draw) from the overall population.\nrng : random state\n Used to break ties.\n\nReturns\n-------\nsampled_classes : ndarray of int\n Number of samples drawn from each class.\n np.sum(sampled_classes) == n_draws\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _approximate_mode\n>>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)\narray([2, 1])\n>>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)\narray([3, 1])\n>>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n... n_draws=2, rng=0)\narray([0, 1, 1, 0])\n>>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n... n_draws=2, rng=42)\narray([1, 1, 0, 0])" - }, - { - "name": "check_matplotlib_support", - "decorators": [], - "parameters": [ - { - "name": "caller_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the caller that requires matplotlib." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raise ImportError with detailed error message if mpl is not installed.\n\nPlot utilities like :func:`plot_partial_dependence` should lazily import\nmatplotlib and call this helper before any computation.\n\nParameters\n----------\ncaller_name : str\n The name of the caller that requires matplotlib." - }, - { - "name": "check_pandas_support", - "decorators": [], - "parameters": [ - { - "name": "caller_name", - "type": "str", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "The name of the caller that requires pandas." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Raise ImportError with detailed error message if pandas is not\ninstalled.\n\nPlot utilities like :func:`fetch_openml` should lazily import\npandas and call this helper before any computation.\n\nParameters\n----------\ncaller_name : str\n The name of the caller that requires pandas." - }, - { - "name": "all_estimators", - "decorators": [], - "parameters": [ - { - "name": "type_filter", - "type": "Literal[\"classifier\", \"regressor\", \"cluster\", \"transformer\"]", - "hasDefault": true, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Which kind of estimators should be returned. If None, no filter is applied and all estimators are returned. Possible values are 'classifier', 'regressor', 'cluster' and 'transformer' to get estimators only of these specific types, or a list of these to get the estimators that fit at least one of the types." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a list of all estimators from sklearn.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.\n\nParameters\n----------\ntype_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"} or list of such str, default=None\n Which kind of estimators should be returned. If None, no filter is\n applied and all estimators are returned. Possible values are\n 'classifier', 'regressor', 'cluster' and 'transformer' to get\n estimators only of these specific types, or a list of these to\n get the estimators that fit at least one of the types.\n\nReturns\n-------\nestimators : list of tuples\n List of (name, class), where ``name`` is the class name as string\n and ``class`` is the actuall type of the class." - } - ] - }, - { - "name": "sklearn.utils.tests.conftest", - "imports": [ - "import pytest", - "import sklearn" - ], - "classes": [], - "functions": [ - { - "name": "print_changed_only_false", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_arpack", - "imports": [ - "import pytest", - "from numpy.testing import assert_allclose", - "from sklearn.utils import check_random_state", - "from sklearn.utils._arpack import _init_arpack_v0" - ], - "classes": [], - "functions": [ - { - "name": "test_init_arpack_v0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_class_weight", - "imports": [ - "import numpy as np", - "import pytest", - "from sklearn.datasets import make_blobs", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.utils.class_weight import compute_class_weight", - "from sklearn.utils.class_weight import compute_sample_weight", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_compute_class_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_not_present", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_dict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_invariance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_balanced_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_balanced_unordered", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_class_weight_default", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight_with_subsample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_compute_sample_weight_more_than_32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_cython_blas", - "imports": [ - "import pytest", - "import numpy as np", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._cython_blas import _dot_memview", - "from sklearn.utils._cython_blas import _asum_memview", - "from sklearn.utils._cython_blas import _axpy_memview", - "from sklearn.utils._cython_blas import _nrm2_memview", - "from sklearn.utils._cython_blas import _copy_memview", - "from sklearn.utils._cython_blas import _scal_memview", - "from sklearn.utils._cython_blas import _rotg_memview", - "from sklearn.utils._cython_blas import _rot_memview", - "from sklearn.utils._cython_blas import _gemv_memview", - "from sklearn.utils._cython_blas import _ger_memview", - "from sklearn.utils._cython_blas import _gemm_memview", - "from sklearn.utils._cython_blas import RowMajor", - "from sklearn.utils._cython_blas import ColMajor", - "from sklearn.utils._cython_blas import Trans", - "from sklearn.utils._cython_blas import NoTrans" - ], - "classes": [], - "functions": [ - { - "name": "_numpy_to_cython", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_no_op", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_asum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_axpy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_nrm2", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_copy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_scal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rotg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_rot", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gemv", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ger", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gemm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_deprecation", - "imports": [ - "import pickle", - "from sklearn.utils.deprecation import _is_deprecated", - "from sklearn.utils.deprecation import deprecated", - "from sklearn.utils._testing import assert_warns_message" - ], - "classes": [ - { - "name": "MockClass1", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "MockClass2", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockClass3", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockClass4", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "mock_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pickle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_encode", - "imports": [ - "import pickle", - "import numpy as np", - "import pytest", - "from numpy.testing import assert_array_equal", - "from sklearn.utils._encode import _unique", - "from sklearn.utils._encode import _encode", - "from sklearn.utils._encode import _check_unknown" - ], - "classes": [], - "functions": [ - { - "name": "test_encode_util", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_encode_with_check_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_assert_check_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_unknown", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_unknown_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_util_missing_values_objects", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_util_missing_values_numeric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_util_with_all_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_unknown_with_both_missing_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_estimator_checks", - "imports": [ - "import unittest", - "import sys", - "import numpy as np", - "import scipy.sparse as sp", - "import joblib", - "from sklearn.base import BaseEstimator", - "from sklearn.base import ClassifierMixin", - "from sklearn.utils import deprecated", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import MinimalClassifier", - "from sklearn.utils._testing import MinimalRegressor", - "from sklearn.utils._testing import MinimalTransformer", - "from sklearn.utils._testing import SkipTest", - "from sklearn.utils.estimator_checks import check_estimator", - "from sklearn.utils.estimator_checks import _NotAnArray", - "from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier", - "from sklearn.utils.estimator_checks import set_random_state", - "from sklearn.utils.estimator_checks import _set_checking_parameters", - "from sklearn.utils.estimator_checks import check_estimators_unfitted", - "from sklearn.utils.estimator_checks import check_fit_score_takes_y", - "from sklearn.utils.estimator_checks import check_no_attributes_set_in_init", - "from sklearn.utils.estimator_checks import check_classifier_data_not_an_array", - "from sklearn.utils.estimator_checks import check_regressor_data_not_an_array", - "from sklearn.utils.estimator_checks import check_estimator_get_tags_default_keys", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.utils.estimator_checks import check_outlier_corruption", - "from sklearn.utils.fixes import np_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.ensemble import RandomForestClassifier", - "from sklearn.linear_model import LinearRegression", - "from sklearn.linear_model import SGDClassifier", - "from sklearn.mixture import GaussianMixture", - "from sklearn.cluster import MiniBatchKMeans", - "from sklearn.decomposition import NMF", - "from sklearn.linear_model import MultiTaskElasticNet", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.svm import SVC", - "from sklearn.svm import NuSVC", - "from sklearn.neighbors import KNeighborsRegressor", - "from sklearn.utils.validation import check_array", - "from sklearn.utils import all_estimators", - "from sklearn.exceptions import SkipTestWarning", - "from pandas import Series", - "from sklearn.preprocessing import LabelEncoder", - "from sklearn.utils import compute_class_weight", - "from pandas import DataFrame", - "from sklearn.datasets import load_iris" - ], - "classes": [ - { - "name": "CorrectNotFittedError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "Exception class to raise if estimator is used before fitting.\n\nLike NotFittedError, it inherits from ValueError, but not from\nAttributeError. Used for testing only." - }, - { - "name": "BaseBadClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ChangesDict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SetsWrongAttribute", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ChangesWrongAttribute", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ChangesUnderscoreAttribute", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "RaisesErrorInSetParams", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "HasMutableParameters", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "HasImmutableParameters", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ModifiesValueInsteadOfRaisingError", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "ModifiesAnotherValue", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "set_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoCheckinPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoSparseClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "CorrectNotFittedErrorClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NoSampleWeightPandasSeriesType", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "BadBalancedWeightsClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "BadTransformerWithoutMixin", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NotInvariantPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NotInvariantSampleOrder", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "LargeSparseNotSupportedClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SparseTransformer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit_transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "EstimatorInconsistentForPandas", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "UntaggedBinaryClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "partial_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "TaggedBinaryClassifier", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "EstimatorMissingDefaultTags", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_get_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "RequiresPositiveYRegressor", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "PoorScoreLogisticRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_not_an_array_array_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_fit_score_takes_y_works_on_deprecated_fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_outlier_corruption", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_transformer_no_mixin", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_clones", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimators_unfitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_no_attributes_set_in_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_pairwise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_classifier_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_regressor_data_not_an_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_estimator_get_tags_default_keys", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "run_tests_without_pytest", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Runs the tests in this file without using pytest.\n " - }, - { - "name": "test_check_class_weight_balanced_linear_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_all_estimators_all_public", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_xfail_ignored_in_check_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_minimal_class_implementation_checks", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_estimator_html_repr", - "imports": [ - "from contextlib import closing", - "from io import StringIO", - "import pytest", - "from sklearn import config_context", - "from sklearn.linear_model import LogisticRegression", - "from sklearn.neural_network import MLPClassifier", - "from sklearn.impute import SimpleImputer", - "from sklearn.decomposition import PCA", - "from sklearn.decomposition import TruncatedSVD", - "from sklearn.pipeline import Pipeline", - "from sklearn.pipeline import FeatureUnion", - "from sklearn.compose import ColumnTransformer", - "from sklearn.ensemble import VotingClassifier", - "from sklearn.feature_selection import SelectPercentile", - "from sklearn.cluster import Birch", - "from sklearn.cluster import AgglomerativeClustering", - "from sklearn.preprocessing import OneHotEncoder", - "from sklearn.svm import LinearSVC", - "from sklearn.svm import LinearSVR", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.multiclass import OneVsOneClassifier", - "from sklearn.ensemble import StackingClassifier", - "from sklearn.ensemble import StackingRegressor", - "from sklearn.gaussian_process import GaussianProcessRegressor", - "from sklearn.gaussian_process.kernels import RationalQuadratic", - "from sklearn.utils._estimator_html_repr import _write_label_html", - "from sklearn.utils._estimator_html_repr import _get_visual_block", - "from sklearn.utils._estimator_html_repr import estimator_html_repr" - ], - "classes": [], - "functions": [ - { - "name": "test_write_label_html", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_single_str_none", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_single_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_feature_union", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_voting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_visual_block_column_transformer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_estimator_html_repr_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_classsifer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stacking_regressor", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_birch_duck_typing_meta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovo_classifier_duck_typing_meta", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_duck_typing_nested_estimator", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_one_estimator_print_change_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_extmath", - "imports": [ - "import numpy as np", - "from scipy import sparse", - "from scipy import linalg", - "from scipy import stats", - "from scipy.special import expit", - "import pytest", - "from sklearn.utils import gen_batches", - "from sklearn.utils._testing import assert_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import skip_if_32bit", - "from sklearn.utils.extmath import density", - "from sklearn.utils.extmath import _safe_accumulator_op", - "from sklearn.utils.extmath import randomized_svd", - "from sklearn.utils.extmath import row_norms", - "from sklearn.utils.extmath import weighted_mode", - "from sklearn.utils.extmath import cartesian", - "from sklearn.utils.extmath import log_logistic", - "from sklearn.utils.extmath import svd_flip", - "from sklearn.utils.extmath import _incremental_mean_and_var", - "from sklearn.utils.extmath import _incremental_weighted_mean_and_var", - "from sklearn.utils.extmath import _deterministic_vector_sign_flip", - "from sklearn.utils.extmath import softmax", - "from sklearn.utils.extmath import stable_cumsum", - "from sklearn.utils.extmath import safe_sparse_dot", - "from sklearn.datasets import make_low_rank_matrix" - ], - "classes": [], - "functions": [ - { - "name": "test_density", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_randomized_svd_low_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_low_rank_all_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_row_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_low_rank_with_noise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_infinite_rank", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_transpose_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_power_iteration_normalizer", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_sparse_warnings", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_svd_flip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_sign_flip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_randomized_svd_sign_flip_with_transpose", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_cartesian", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_logistic_sigmoid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "rng", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_weighted_mean_and_variance_simple", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_weighted_mean_and_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_weighted_mean_and_variance_ignore_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_variance_update_formulas", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_mean_and_variance_ignore_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_variance_numerical_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incremental_variance_ddof", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_vector_sign_flip", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_softmax", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_stable_cumsum", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_nd", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_2d_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_sparse_dot_dense_output", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_fast_dict", - "imports": [ - "import numpy as np", - "from sklearn.utils._fast_dict import IntFloatDict", - "from sklearn.utils._fast_dict import argmin" - ], - "classes": [], - "functions": [ - { - "name": "test_int_float_dict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_int_float_dict_argmin", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_fixes", - "imports": [ - "import math", - "import numpy as np", - "import pytest", - "import scipy.stats", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils.fixes import _joblib_parallel_args", - "from sklearn.utils.fixes import _object_dtype_isnan", - "from sklearn.utils.fixes import loguniform", - "from sklearn.utils.fixes import MaskedArray", - "import joblib" - ], - "classes": [], - "functions": [ - { - "name": "test_joblib_parallel_args", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_object_dtype_isnan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_loguniform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_masked_array_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_metaestimators", - "imports": [ - "from sklearn.utils.metaestimators import if_delegate_has_method" - ], - "classes": [ - { - "name": "Prefix", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockMetaEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is a mock delegated function" - } - ], - "docstring": "This is a mock meta estimator" - }, - { - "name": "MetaEst", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock meta estimator" - }, - { - "name": "MetaEstTestTuple", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock meta estimator to test passing a tuple of delegates" - }, - { - "name": "MetaEstTestList", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock meta estimator to test passing a list of delegates" - }, - { - "name": "HasPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": "A mock sub-estimator with predict method" - }, - { - "name": "HasNoPredict", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": "A mock sub-estimator with no predict method" - } - ], - "functions": [ - { - "name": "test_delegated_docstring", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_if_delegate_has_method", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_mocking", - "imports": [ - "import numpy as np", - "import pytest", - "from scipy import sparse", - "from numpy.testing import assert_array_equal", - "from numpy.testing import assert_allclose", - "from sklearn.datasets import load_iris", - "from sklearn.utils import check_array", - "from sklearn.utils import _safe_indexing", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils._mocking import CheckingClassifier" - ], - "classes": [], - "functions": [ - { - "name": "iris", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_success", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_on_fit_success", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_on_fit_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X_on_predict_success", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X_on_predict_fail", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_with_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_missing_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_checking_classifier_methods_to_check", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_multiclass", - "imports": [ - "import numpy as np", - "import scipy.sparse as sp", - "from itertools import product", - "import pytest", - "from scipy.sparse import issparse", - "from scipy.sparse import csc_matrix", - "from scipy.sparse import csr_matrix", - "from scipy.sparse import coo_matrix", - "from scipy.sparse import dok_matrix", - "from scipy.sparse import lil_matrix", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_array_almost_equal", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils.estimator_checks import _NotAnArray", - "from sklearn.utils.fixes import parse_version", - "from sklearn.utils.multiclass import unique_labels", - "from sklearn.utils.multiclass import is_multilabel", - "from sklearn.utils.multiclass import type_of_target", - "from sklearn.utils.multiclass import class_distribution", - "from sklearn.utils.multiclass import check_classification_targets", - "from sklearn.utils.multiclass import _ovr_decision_function", - "from sklearn.utils.metaestimators import _safe_split", - "from sklearn.model_selection import ShuffleSplit", - "from sklearn.svm import SVC", - "from sklearn import datasets" - ], - "classes": [], - "functions": [ - { - "name": "test_unique_labels", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_labels_non_specific", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_unique_labels_mixed_types", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_multilabel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_classification_targets", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_type_of_target", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_type_of_target_pandas_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_class_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_split_with_precomputed_kernel", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ovr_decision_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_murmurhash", - "imports": [ - "import numpy as np", - "from sklearn.utils.murmurhash import murmurhash3_32", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_mmhash3_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mmhash3_int_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mmhash3_bytes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mmhash3_unicode", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_no_collision_on_byte_range", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_uniform_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_optimize", - "imports": [ - "import numpy as np", - "from sklearn.utils.optimize import _newton_cg", - "from scipy.optimize import fmin_ncg", - "from sklearn.utils._testing import assert_array_almost_equal" - ], - "classes": [], - "functions": [ - { - "name": "test_newton_cg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_parallel", - "imports": [ - "from distutils.version import LooseVersion", - "import pytest", - "from joblib import Parallel", - "import joblib", - "from numpy.testing import assert_array_equal", - "from sklearn._config import config_context", - "from sklearn._config import get_config", - "from sklearn.utils.fixes import delayed" - ], - "classes": [], - "functions": [ - { - "name": "get_working_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_configuration_passes_through_to_joblib", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_pprint", - "imports": [ - "import re", - "from pprint import PrettyPrinter", - "import numpy as np", - "from sklearn.utils._pprint import _EstimatorPrettyPrinter", - "from sklearn.linear_model import LogisticRegressionCV", - "from sklearn.pipeline import make_pipeline", - "from sklearn.base import BaseEstimator", - "from sklearn.base import TransformerMixin", - "from sklearn.feature_selection import SelectKBest", - "from sklearn.feature_selection import chi2", - "from sklearn import set_config", - "from sklearn import config_context" - ], - "classes": [ - { - "name": "LogisticRegression", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "StandardScaler", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "transform", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "RFE", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "GridSearchCV", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "CountVectorizer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Pipeline", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SVC", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "PCA", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "NMF", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "SimpleImputer", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_basic", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_changed_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deeply_nested", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gridsearch_pipeline", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_n_max_elements_to_show", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_bruteforce_ellipsis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_builtin_prettyprinter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_kwargs_in_init", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_complexity_print_changed_only", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_random", - "imports": [ - "import numpy as np", - "import pytest", - "import scipy.sparse as sp", - "from scipy.special import comb", - "from numpy.testing import assert_array_almost_equal", - "from sklearn.utils.random import _random_choice_csc", - "from sklearn.utils.random import sample_without_replacement", - "from sklearn.utils._random import _our_rand_r_py" - ], - "classes": [], - "functions": [ - { - "name": "test_invalid_sample_without_replacement_algorithm", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_sample_without_replacement_algorithms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_edge_case_of_sample_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_int", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_sample_int_distribution", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_choice_csc", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_random_choice_csc_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_our_rand_r", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_seq_dataset", - "imports": [ - "import numpy as np", - "import pytest", - "import scipy.sparse as sp", - "from numpy.testing import assert_array_equal", - "from sklearn.utils._seq_dataset import ArrayDataset32", - "from sklearn.utils._seq_dataset import ArrayDataset64", - "from sklearn.utils._seq_dataset import CSRDataset32", - "from sklearn.utils._seq_dataset import CSRDataset64", - "from sklearn.datasets import load_iris", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "assert_csr_equal_values", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_dense_dataset_32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_dense_dataset_64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_sparse_dataset_32", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "make_sparse_dataset_64", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_seq_dataset_basic_iteration", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_seq_dataset_shuffle", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_fused_types_consistency", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_buffer_dtype_mismatch_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_shortest_path", - "imports": [ - "from collections import defaultdict", - "import numpy as np", - "from numpy.testing import assert_array_almost_equal", - "from sklearn.utils.graph import graph_shortest_path", - "from sklearn.utils.graph import single_source_shortest_path_length" - ], - "classes": [], - "functions": [ - { - "name": "floyd_warshall_slow", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "generate_graph", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_floyd_warshall", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dijkstra", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shortest_path", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_dijkstra_bug_fix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_show_versions", - "imports": [ - "from sklearn.utils._show_versions import _get_sys_info", - "from sklearn.utils._show_versions import _get_deps_info", - "from sklearn.utils._show_versions import show_versions", - "from sklearn.utils._testing import ignore_warnings" - ], - "classes": [], - "functions": [ - { - "name": "test_get_sys_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_deps_info", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_show_versions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_sparsefuncs", - "imports": [ - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from scipy import linalg", - "from numpy.testing import assert_array_almost_equal", - "from numpy.testing import assert_array_equal", - "from numpy.random import RandomState", - "from sklearn.datasets import make_classification", - "from sklearn.utils.sparsefuncs import mean_variance_axis", - "from sklearn.utils.sparsefuncs import incr_mean_variance_axis", - "from sklearn.utils.sparsefuncs import inplace_column_scale", - "from sklearn.utils.sparsefuncs import inplace_row_scale", - "from sklearn.utils.sparsefuncs import inplace_swap_row", - "from sklearn.utils.sparsefuncs import inplace_swap_column", - "from sklearn.utils.sparsefuncs import min_max_axis", - "from sklearn.utils.sparsefuncs import count_nonzero", - "from sklearn.utils.sparsefuncs import csc_median_axis_0", - "from sklearn.utils.sparsefuncs_fast import assign_rows_csr", - "from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l1", - "from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l2", - "from sklearn.utils.sparsefuncs_fast import csr_row_norms", - "from sklearn.utils._testing import assert_allclose" - ], - "classes": [], - "functions": [ - { - "name": "test_mean_variance_axis0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_variance_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_weighted_axis1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_weighted_axis0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_dim_mismatch", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that we raise proper error when axis=1 and the dimension mismatch.\nNon-regression test for:\nhttps://github.com/scikit-learn/scikit-learn/pull/18655" - }, - { - "name": "test_incr_mean_variance_axis_equivalence_mean_variance", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_no_new_n", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_n_float", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_incr_mean_variance_axis_ignore_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_mean_variance_illegal_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_densify_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_column_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_row_scale", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_swap_row", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_swap_column", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_min_max_axis_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_count_nonzero", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_csc_row_median", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_inplace_normalize", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_csr_row_norms", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_stats", - "imports": [ - "import numpy as np", - "from numpy.testing import assert_allclose", - "from pytest import approx", - "from sklearn.utils.stats import _weighted_percentile" - ], - "classes": [], - "functions": [ - { - "name": "test_weighted_percentile", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_percentile_equal", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_percentile_zero_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_median_equal_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_median_integer_weights", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_weighted_percentile_2d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_tags", - "imports": [ - "import pytest", - "from sklearn.base import BaseEstimator", - "from sklearn.utils._tags import _DEFAULT_TAGS", - "from sklearn.utils._tags import _safe_tags" - ], - "classes": [ - { - "name": "NoTagsEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - }, - { - "name": "MoreTagsEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "_more_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_safe_tags_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_tags_no_get_tags", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_testing", - "imports": [ - "import warnings", - "import unittest", - "import sys", - "import os", - "import atexit", - "import numpy as np", - "from scipy import sparse", - "import pytest", - "from sklearn.utils.deprecation import deprecated", - "from sklearn.utils.metaestimators import if_delegate_has_method", - "from sklearn.utils._testing import assert_raises", - "from sklearn.utils._testing import assert_warns", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import set_random_state", - "from sklearn.utils._testing import assert_raise_message", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import check_docstring_parameters", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_raises_regex", - "from sklearn.utils._testing import TempMemmap", - "from sklearn.utils._testing import create_memmap_backed_data", - "from sklearn.utils._testing import _delete_folder", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils._testing import raises", - "from sklearn.tree import DecisionTreeClassifier", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis" - ], - "classes": [ - { - "name": "TestWarns", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "test_warn", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_warn_wrong_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "Klass", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "f_missing", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "f_bad_sections", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameter\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nResults\n-------\nc : list\n Parameter c" - } - ], - "docstring": null - }, - { - "name": "MockEst", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "MockEstimator" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "MockMetaEstimator", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "MetaEstimator to check if doctest on delegated methods work.\n\nParameters\n---------\ndelegate : estimator\n Delegated estimator." - }, - { - "name": "predict", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "NDArray", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter y" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is available only if delegate has predict.\n\nParameters\n----------\ny : ndarray\n Parameter y" - }, - { - "name": "score", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is available only if delegate has score.\n\nParameters\n---------\ny : ndarray\n Parameter y" - }, - { - "name": "predict_proba", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "This is available only if delegate has predict_proba.\n\nParameters\n---------\nX : ndarray\n Parameter X" - }, - { - "name": "fit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Incorrect docstring but should not be tested" - } - ], - "docstring": null - }, - { - "name": "RegistrationCounter", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "__call__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_set_random_state", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_allclose_dense_sparse", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_raises_msg", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_assert_raise_message", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ignore_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "f_ok", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nReturns\n-------\nc : list\n Parameter c" - }, - { - "name": "f_bad_sections", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nResults\n-------\nc : list\n Parameter c" - }, - { - "name": "f_bad_order", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b", - "type": "float", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : float\n Parameter b\n\nReturns\n-------\nc : list\n Parameter c" - }, - { - "name": "f_too_many_param_docstring", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - }, - { - "name": "c", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter c" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\nb : int\n Parameter b\nc : int\n Parameter c\n\nReturns\n-------\nd : list\n Parameter c" - }, - { - "name": "f_missing", - "decorators": [], - "parameters": [ - { - "name": "a", - "type": "int", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na : int\n Parameter a\n\nReturns\n-------\nc : list\n Parameter c" - }, - { - "name": "f_check_param_definition", - "decorators": [], - "parameters": [ - { - "name": "a: int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter a" - }, - { - "name": "b:", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter b" - }, - { - "name": "c :", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter c" - }, - { - "name": "d:int", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Parameter d" - }, - { - "name": "e", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "No typespec is allowed without colon" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Function f\n\nParameters\n----------\na: int\n Parameter a\nb:\n Parameter b\nc :\n Parameter c\nd:int\n Parameter d\ne\n No typespec is allowed without colon" - }, - { - "name": "test_check_docstring_parameters", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_tempmemmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_create_memmap_backed_data", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_convert_container", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_raises", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_utils", - "imports": [ - "from copy import copy", - "from itertools import chain", - "import warnings", - "import string", - "import timeit", - "import pytest", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_warns_message", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import _convert_container", - "from sklearn.utils import check_random_state", - "from sklearn.utils import _determine_key_type", - "from sklearn.utils import deprecated", - "from sklearn.utils import gen_batches", - "from sklearn.utils import _get_column_indices", - "from sklearn.utils import resample", - "from sklearn.utils import safe_mask", - "from sklearn.utils import column_or_1d", - "from sklearn.utils import _safe_indexing", - "from sklearn.utils import shuffle", - "from sklearn.utils import gen_even_slices", - "from sklearn.utils import _message_with_time", - "from sklearn.utils import _print_elapsed_time", - "from sklearn.utils import get_chunk_n_rows", - "from sklearn.utils import is_scalar_nan", - "from sklearn.utils import _to_object_array", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn import config_context", - "from sklearn.utils import parallel_backend", - "from sklearn.utils import register_parallel_backend", - "from sklearn.utils._joblib import joblib" - ], - "classes": [], - "functions": [ - { - "name": "test_make_rng", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gen_batches", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecated", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratified", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratified_replace", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratify_2dy", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_resample_stratify_sparse_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_column_or_1d", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_determine_key_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_determine_key_type_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_determine_key_type_slice_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_container_axis_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_container", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_container_axis_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_read_only_axis_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_container_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_mask", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_scalar_axis_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_scalar", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_2d_scalar_axis_1", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_None_axis_0", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_pandas_no_matching_cols_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_error_axis", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_1d_array_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_safe_indexing_container_axis_0_unsupported_type", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_column_indices_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_column_indices_pandas_nonunique_columns_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_on_ndim_equals_three", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_shuffle_dont_convert_to_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_gen_even_slices", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_get_chunk_n_rows", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_message_with_time", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_print_elapsed_time", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_is_scalar_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "dummy_func", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecation_joblib_api", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_to_object_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests.test_validation", - "imports": [ - "import warnings", - "import os", - "from tempfile import NamedTemporaryFile", - "from itertools import product", - "from operator import itemgetter", - "import pytest", - "from pytest import importorskip", - "import numpy as np", - "import scipy.sparse as sp", - "from sklearn.utils._testing import assert_no_warnings", - "from sklearn.utils._testing import ignore_warnings", - "from sklearn.utils._testing import SkipTest", - "from sklearn.utils._testing import assert_array_equal", - "from sklearn.utils._testing import assert_allclose_dense_sparse", - "from sklearn.utils._testing import assert_allclose", - "from sklearn.utils import as_float_array", - "from sklearn.utils import check_array", - "from sklearn.utils import check_symmetric", - "from sklearn.utils import check_X_y", - "from sklearn.utils import deprecated", - "from sklearn.utils._mocking import MockDataFrame", - "from sklearn.utils.fixes import np_version", - "from sklearn.utils.fixes import parse_version", - "from sklearn.utils.estimator_checks import _NotAnArray", - "from sklearn.random_projection import _sparse_random_matrix", - "from sklearn.linear_model import ARDRegression", - "from sklearn.neighbors import KNeighborsClassifier", - "from sklearn.ensemble import RandomForestRegressor", - "from sklearn.svm import SVR", - "from sklearn.datasets import make_blobs", - "from sklearn.utils import _safe_indexing", - "from sklearn.utils.validation import has_fit_parameter", - "from sklearn.utils.validation import check_is_fitted", - "from sklearn.utils.validation import check_consistent_length", - "from sklearn.utils.validation import assert_all_finite", - "from sklearn.utils.validation import check_memory", - "from sklearn.utils.validation import check_non_negative", - "from sklearn.utils.validation import _num_samples", - "from sklearn.utils.validation import check_scalar", - "from sklearn.utils.validation import _check_psd_eigenvalues", - "from sklearn.utils.validation import _deprecate_positional_args", - "from sklearn.utils.validation import _check_sample_weight", - "from sklearn.utils.validation import _allclose_dense_sparse", - "from sklearn.utils.validation import FLOAT_DTYPES", - "from sklearn.utils.validation import _check_fit_params", - "import sklearn", - "from sklearn.exceptions import NotFittedError", - "from sklearn.exceptions import PositiveSpectrumWarning", - "from sklearn.utils._testing import TempMemmap", - "import pandas as pd" - ], - "classes": [ - { - "name": "DummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "cache", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ], - "docstring": null - }, - { - "name": "WrongDummyMemory", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - } - ], - "docstring": null - } - ], - "functions": [ - { - "name": "test_as_float_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_as_float_array_nan", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_np_matrix", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_ordering", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finite_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finiteinvalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finite_object", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_force_all_finite_object_unsafe_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_numeric_warns", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that check_array warns when it converts a bytes/string into a\nfloat." - }, - { - "name": "test_check_array_dtype_numeric_errors", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Error when string-ike array can not be converted" - }, - { - "name": "test_check_array_pandas_na_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_pandas_dtype_object_conversion", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_pandas_dtype_casting", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_on_mock_dataframe", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_dtype_stability", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_dtype_warning", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_sparse_type_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_sparse_no_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "X_64bit", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_large_sparse_no_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_accept_large_sparse_raise_exception", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_min_samples_and_features_messages", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_complex_data_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_has_fit_parameter", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_symmetric", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_is_fitted_with_attributes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_consistent_length", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_dataframe_fit_attribute", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_suppress_validation", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_series", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_dataframe_mixed_float_dtypes", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_memory", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_array_memmap", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_non_negative", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_X_y_informative_error", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_retrieve_samples_from_non_standard_shape", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_scalar_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that check_scalar returns no error/warning if valid inputs are\nprovided" - }, - { - "name": "test_check_scalar_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Test that check_scalar returns the right error if a wrong input is\ngiven" - }, - { - "name": "test_check_psd_eigenvalues_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_psd_eigenvalues_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_sample_weight", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allclose_dense_sparse_equals", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allclose_dense_sparse_not_equals", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_allclose_dense_sparse_raise", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecate_positional_args_warns_for_function", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecate_positional_args_warns_for_function_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_deprecate_positional_args_warns_for_class", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_fit_params", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_sparse_pandas_sp_format", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "test_check_pandas_sparse_invalid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "check that we raise an error with dataframe having\nsparse extension arrays with unsupported mixed dtype\nand pandas version below 1.1. pandas versions 1.1 and\nabove fixed this issue so no error will be raised." - }, - { - "name": "test_check_pandas_sparse_valid", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - } - ] - }, - { - "name": "sklearn.utils.tests", - "imports": [], - "classes": [], - "functions": [] - }, - { - "name": "sklearn._build_utils.openmp_helpers", - "imports": [ - "import os", - "import sys", - "import textwrap", - "import warnings", - "import subprocess", - "from distutils.errors import CompileError", - "from distutils.errors import LinkError", - "from pre_build_helpers import compile_test_program" - ], - "classes": [], - "functions": [ - { - "name": "get_openmp_flag", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "check_openmp_support", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check whether OpenMP test code can be compiled and run" - } - ] - }, - { - "name": "sklearn._build_utils.pre_build_helpers", - "imports": [ - "import os", - "import sys", - "import glob", - "import tempfile", - "import textwrap", - "import setuptools", - "import subprocess", - "from distutils.dist import Distribution", - "from distutils.sysconfig import customize_compiler", - "from numpy.distutils.ccompiler import new_compiler", - "from numpy.distutils.command.config_compiler import config_cc" - ], - "classes": [], - "functions": [ - { - "name": "_get_compiler", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n - python setup.py build_ext --compiler=\n - CC= python setup.py build_ext" - }, - { - "name": "compile_test_program", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that some C code can be compiled and run" - }, - { - "name": "basic_check_build", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check basic compilation and linking of C code" - } - ] - }, - { - "name": "sklearn._build_utils", - "imports": [ - "import os", - "import sklearn", - "import contextlib", - "from distutils.version import LooseVersion", - "from pre_build_helpers import basic_check_build", - "from openmp_helpers import check_openmp_support", - "from _min_dependencies import CYTHON_MIN_VERSION", - "import Cython", - "from Cython.Build import cythonize", - "import joblib", - "from Cython import Tempita" - ], - "classes": [], - "functions": [ - { - "name": "_check_cython_version", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": null - }, - { - "name": "cythonize_extensions", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Check that a recent Cython is available and cythonize extensions" - }, - { - "name": "gen_from_templates", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Generate cython files from a list of templates" - } - ] - }, - { - "name": "sklearn._loss.glm_distribution", - "imports": [ - "from abc import ABCMeta", - "from abc import abstractmethod", - "from collections import namedtuple", - "import numbers", - "import numpy as np", - "from scipy.special import xlogy" - ], - "classes": [ - { - "name": "ExponentialDispersionModel", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [], - "hasReturnType": false, - "returnType": "Any", - "docstring": "" - }, - { - "name": "in_y_range", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Returns ``True`` if y is in the valid range of Y~EDM.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values." - }, - { - "name": "unit_variance", - "decorators": [], - "parameters": [ - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Predicted mean." - }, - { - "name": "unit_deviance", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - }, - { - "name": "check_input", - "type": "bool", - "hasDefault": true, - "default": "False", - "limitation": null, - "ignored": false, - "docstring": "If True raise an exception on invalid y or y_pred values, otherwise they will be propagated as NaN." - }, - { - "name": "Returns", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "-------", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "" - }, - { - "name": "deviance: array of shape (n_samples", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Computed deviance" - }, - { - "name": ")", - "type": null, - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Computed deviance" - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the unit deviance.\n\nThe unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\nlog-likelihood as\n:math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\ncheck_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\nReturns\n-------\ndeviance: array of shape (n_samples,)\n Computed deviance" - }, - { - "name": "unit_deviance_derivative", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean." - }, - { - "name": "deviance", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - }, - { - "name": "weights", - "type": "Union[Array, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Weights or exposure to which variance is inverse proportional." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the deviance.\n\nThe deviance is a weighted sum of the per sample unit deviances,\n:math:`D = \\sum_i s_i \\cdot d(y_i, y_\\textrm{pred}_i)`\nwith weights :math:`s_i` and unit deviance\n:math:`d(y,y_\\textrm{pred})`.\nIn terms of the log-likelihood it is :math:`D = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\frac{phi}{s})\n- loglike(y,y,\\frac{phi}{s})\\right)`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\nweights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional." - }, - { - "name": "deviance_derivative", - "decorators": [], - "parameters": [ - { - "name": "y", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Target values." - }, - { - "name": "y_pred", - "type": "Array", - "hasDefault": false, - "default": "None", - "limitation": null, - "ignored": false, - "docstring": "Predicted mean." - }, - { - "name": "weights", - "type": "Union[Array, int]", - "hasDefault": true, - "default": "1", - "limitation": null, - "ignored": false, - "docstring": "Weights or exposure to which variance is inverse proportional." - } - ], - "hasReturnType": false, - "returnType": "Any", - "docstring": "Compute the derivative of the deviance w.r.t. y_pred.\n\nIt gives :math:`\\frac{\\partial}{\\partial y_\\textrm{pred}}\nD(y, \\y_\\textrm{pred}; weights)`.\n\nParameters\n----------\ny : array, shape (n_samples,)\n Target values.\n\ny_pred : array, shape (n_samples,)\n Predicted mean.\n\nweights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional." - } - ], - "docstring": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n = \\tilde{c}(y, \\phi)\n \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.\n\nMethods\n-------\ndeviance\ndeviance_derivative\nin_y_range\nunit_deviance\nunit_deviance_derivative\nunit_variance\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/Exponential_dispersion_model." - }, - { - "name": "TweedieDistribution", - "decorators": [], - "superclasses": [], - "methods": [ - { - "name": "__init__", - "decorators": [], - "parameters": [ - { - "name": "power", - "type": "float", - "hasDefault": true, - "default": "0", - "limitation": null, - "ignored": false, - "docstring": "The variance power of the `unit_variance` :math:`v(y_\\textrm{pred}) = y_\\textrm{pred}^{power}`. For ``0